{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!/usr/bin/env python\n",
    "# coding: utf-8\n",
    "\n",
    "# 首先读取单个数据，查看文件结构\n",
    "\n",
    "# In[1]:\n",
    "\n",
    "\n",
    "\n",
    "# In[1]:\n",
    "\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     渔船ID             x             y    速度   方向           time type\n",
      "0       1  6.076254e+06  5.061743e+06  3.99  278  1110 11:40:21   拖网\n",
      "1       1  6.077380e+06  5.061819e+06  4.26  257  1110 11:33:03   拖网\n",
      "2       1  6.079838e+06  5.062075e+06  3.67  257  1110 11:10:22   拖网\n",
      "3       1  6.081068e+06  5.062258e+06  3.78  256  1110 11:00:22   拖网\n",
      "4       1  6.082194e+06  5.062335e+06  3.99  251  1110 10:50:22   拖网\n",
      "..    ...           ...           ...   ...  ...            ...  ...\n",
      "380     1  6.102450e+06  5.112760e+06  0.05    0  1107 12:40:34   拖网\n",
      "381     1  6.102450e+06  5.112760e+06  0.00    0  1107 12:30:34   拖网\n",
      "382     1  6.102450e+06  5.112760e+06  0.00    0  1107 12:20:34   拖网\n",
      "383     1  6.102450e+06  5.112760e+06  0.05    0  1107 12:10:34   拖网\n",
      "384     1  6.102450e+06  5.112760e+06  0.00    0  1107 12:00:34   拖网\n",
      "\n",
      "[385 rows x 7 columns]\n",
      "                  x             y          速度          方向\n",
      "count  3.850000e+02  3.850000e+02  385.000000  385.000000\n",
      "mean   6.091460e+06  5.094050e+06    1.607922   56.153247\n",
      "std    1.654339e+04  2.676404e+04    2.412688   91.449382\n",
      "min    6.049472e+06  5.042857e+06    0.000000    0.000000\n",
      "25%    6.074562e+06  5.061049e+06    0.000000    0.000000\n",
      "50%    6.102450e+06  5.112760e+06    0.050000    0.000000\n",
      "75%    6.102450e+06  5.112760e+06    3.450000   77.000000\n",
      "max    6.102450e+06  5.112874e+06   10.470000  336.000000\n"
     ]
    }
   ],
   "source": [
    "# In[2]:\n",
    "\n",
    "\n",
    "data = pd.read_csv('./hy_round1_train_20200102/1.csv')\n",
    "print(data)\n",
    "\n",
    "\n",
    "# 每个文件包含7列，分别是ID，x，y,速度，方向,时间以及类型。\n",
    "#\n",
    "#\n",
    "# 其中x，y，速度，方向都是数值类型，我们可以对这些数值进行一个统计\n",
    "# 用以观察这一系列数据的范围、方差等等。\n",
    "\n",
    "# In[3]:\n",
    "\n",
    "\n",
    "print(data[[\"x\", \"y\", \"速度\", '方向']].describe())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 接下来，需要对数据进行特征的提取。在此我们主要对x，y，速度，方向四个属性的统计特征进行提取\n",
    "\n",
    "# In[4]:\n",
    "\n",
    "\n",
    "#这里采用x，y，速度，方向四个属性中的最大值，最小值，平均值以及标准差作为每个渔船的特征\n",
    "def get_feature(df, train_mode=True):\n",
    "    \"\"\"\n",
    "    test_mode: 用于区分训练数据和测试数据，训练数据存在label而测试数据不存在label\n",
    "    \"\"\"\n",
    "    df = df.iloc[::-1]\n",
    "\n",
    "    if train_mode:\n",
    "        df['type'] = df['type'].map({'拖网': 0, '围网': 1, '刺网': 2})# 将label由str类型转换为int类型\n",
    "        label = np.array(df['type'].iloc[0])\n",
    "        df = df.drop(['type'], axis=1)\n",
    "    else:\n",
    "        label = None\n",
    "    features = np.array([df['x'].std(), df['x'].mean(), df['x'].max(), df['x'].min(),\n",
    "                df['y'].std(), df['y'].mean(), df['y'].max(), df['y'].min(),\n",
    "                df['速度'].mean(), df['速度'].std(), df['速度'].max(), df['速度'].min(),\n",
    "                df['方向'].mean(), df['方向'].std(), df['方向'].max(), df['方向'].min(),\n",
    "                ])\n",
    "    return features, label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([1.65433944e+04, 6.09146039e+06, 6.10244988e+06, 6.04947187e+06,\n",
       "        2.67640427e+04, 5.09405027e+06, 5.11287399e+06, 5.04285734e+06,\n",
       "        1.60792208e+00, 2.41268811e+00, 1.04700000e+01, 0.00000000e+00,\n",
       "        5.61532468e+01, 9.14493820e+01, 3.36000000e+02, 0.00000000e+00]),\n",
       " array(0, dtype=int64))"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看单个渔船的特征，以及标签属性\n",
    "\n",
    "# In[5]:\n",
    "\n",
    "\n",
    "get_feature(data)\n",
    "\n",
    "\n",
    "# 接下来读取全部的文件，并提取其中的特征以及标签，然后将这些数据装到一个list中\n",
    "\n",
    "# In[6]:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "def load_data():\n",
    "    path = './hy_round1_train_20200102'\n",
    "    train_file = os.listdir(path)\n",
    "    X = []\n",
    "    Y = []\n",
    "    for i, each in enumerate(train_file):\n",
    "        if not i % 1000:  #每读1000个文件输出一次\n",
    "            print(i)\n",
    "        each_path = os.path.join(path, each)\n",
    "        df = pd.read_csv(each_path)\n",
    "        x, y = get_feature(df)\n",
    "        X.append(x)\n",
    "        Y.append(y)\n",
    "    X = np.array(X)\n",
    "    Y = np.array(Y)\n",
    "    return X, Y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# In[7]:\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "# 全部一次读取大概需要花1分钟（由于硬件原因，每台电脑花费的时间不尽相同）\n",
    "#\n",
    "# 每次加载数据都需要不少的时间得到两个numpy数组，而且每次加载都需要重复的计算特征，同时我们需要的也仅是特征属性，其他内容不需要被读取。\n",
    "#\n",
    "# 因此为了快速加载数据，我们可以把加载好的数据存为.npy格式。\n",
    "#\n",
    "# .npy文件是numpy专用的二进制文件,能够快速的读取。\n",
    "\n",
    "# In[8]:\n",
    "\n",
    "X,Y = load_data()\n",
    "np.save(\"./npy/x_array.npy\", X)\n",
    "np.save(\"./npy/y_array.npy\", Y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = np.load(\"./npy/x_array.npy\")\n",
    "Y = np.load(\"./npy/y_array.npy\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# In[9]:\n",
    "\n",
    "\n",
    "# 通过.npy文件读取只需要用不到1秒的时间\n",
    "#\n",
    "# 基本数据读取已经完成，接下来就是训练过程\n",
    "#\n",
    "# 首先需要把数据分为训练集与测试两个部分，一般采用8:2或7:3比例\n",
    "\n",
    "# 使用sklearn决策树对部分数据进行分类\n",
    "#\n",
    "# （在深度学习中跑一次模型可能会花很长的时间，这时候先使用部分数据既可以方便程序的debug，也能对模型运行时间有个大致的概念）\n",
    "\n",
    "# In[10]:\n",
    "\n",
    "\n",
    "from sklearn import tree\n",
    "from sklearn.metrics import f1_score, accuracy_score, plot_confusion_matrix\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(X[:3000], Y[:3000], test_size=0.2, random_state=0)\n",
    "clf=tree.DecisionTreeClassifier(random_state=2021)\n",
    "clf=clf.fit(X_train,y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "训练集准确率 1.0\n",
      "训练集f1_score 1.0\n",
      "测试集准确率 0.8333333333333334\n",
      "测试集f1_score 0.7762393089613528\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAATgAAAEGCAYAAADxD4m3AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAgMklEQVR4nO3deZhU1bnv8e+vmkEEFAEhBFFQWxFHDHHAWZM45CROMQdNvJqokByNejNqTm70qCTmKFGTE89xjHrjhDMxKgqOeFEQQpAhKIoKgiiIgiDK8N4/ajc2SFdX0V29q3b/Pjz76apVe3i7HnhZe6291lJEYGaWRbm0AzAzKxcnODPLLCc4M8ssJzgzyywnODPLrDZpB1Cf2nYMbdYl7TAq1p47dU87hIonlHYIFe3NN+eyeNHiJn1JNV1rI1atKGrf+Gj+6Ig4qinXa4rKSnCbdaH9oGFph1GxnnzM301j2uQq6q90xTlkvyOafI5YtaLof6crn74o1f+V/bfBzEojQNVRU3aCM7PS1dSkHUFRnODMrERyDc7MMkqAquMBDCc4MytdzjU4M8sq36KaWTbJt6hmllECapzgzCyTXIMzsyxzG5yZZZIfEzGzTPNjImaWSRLkPFTLzLLKbXBmllm+RTWzbPJjImaWVZ4PzswyzQnOzLJJnvDSzDKqim5Rq6Ol0MwqSNLJUMxW6CzSZpImSPqHpOmS/iMp7yrpCUmvJj+3qnfMhZJmS5ol6cjGInWCM7PS5VTcVtgnwOERsSewF3CUpP2AC4CxEVELjE3eI2kAMATYFTgKuFZSwXtlJzgzK51U3FZA5H2UvG2bbAEcC9yalN8KHJe8Pha4KyI+iYg5wGxgn0LXcIIzs9LUDdUqZoPukl6qtw1d/1SqkTQFeBd4IiJeBHpGxAKA5GePZPfewNx6h89LyhrkTgYzK13xIxkWRcSghj6MiDXAXpK6AA9I2q3AuTZ20Sh0cdfgzKx0zdDJUF9EfAA8Tb5tbaGkXgDJz3eT3eYBfeodtg0wv9B5neDMrDR1j4k0sQ1O0tZJzQ1JHYCvAP8ERgGnJbudBjyUvB4FDJHUXlI/oBaYUOgavkU1sxIJNc9zcL2AW5Oe0BwwMiIeljQeGCnpDOAt4CSAiJguaSQwA1gNnJ3c4jbICc7MStJcz/lGxFRg4EbKFwNHNHDMcGB4sddwgjOz0ghqaqpjJEOrTXDt29bwt6u+Rfu2NdTU5Bj17Gwuv+0Ffnn6fhwzeAfWrg3e+2AFZ1/xBO8sXs5Jh+/Mj779pXXH77p9dw754R1Me21Rir9Fy3n7vY8474qneHfJCnIS3z1mF848bnf+89aJjB7/BsqJ7l06cPVPDuUL3TqmHW4qzhsxlideeJPuXTrw7A0nryu/8cGp3DTqZdrUiK/s05eLzhqcYpTNo5luUctOEQV7WZt2cuko4BqgBrgxIi4vtH+uc+9oP2hY2eLZUMfN2rJ85Sra1OR49OqTuPDaZ5j15vssW/EpAEOP25P+23Xjx9c8ud5xA/p14/ZLvsHAU29psVgB5j/Wct/NhhYuXs7C91ewR+3WfLTiU4760f3c/Osj6dW9I507tgPgxgdf5tW3lvC7cw9OLc42ufT+zx4/dT4dO7TlnP8csy7BjZsyj6vunMQdl/4L7dvV8N6SFWy91eapxXjIfkcwedKUJmWnNj13jE4nX1nUvh9ec/ykQo+JlFvZelGThsM/AUcDA4CTk6EWFWP5ylUAtG2To22bHBGxLrkBdOzQltjIYzYnHrYz9z05q8XirAQ9u3Vkj9qtAei0eTt27NOFBYuXr0tuAB+vXF01/7OXw/57fJEunduvV3bLw9M491/3pn27/IiiNJNbc5JU1Ja2cv53tw8wOyJeB5B0F/mhFjPKeM2S5HLi6WtPpl/vLbnpoalM+udCAH71vf0Z8tVdWLr8E77x0/s/d9zxh9bynV8/3NLhVoy57yxj2muL2Xvn/APml98ygXvGvMIWHdtx7+++kXJ0leW1eR/wwrT5/PbPL9C+XRsuHjqYgTv3TDusJquE5FWMcj4HV/Kwipa2dm1w8A/uYNchN7F3/57s0rcbAJf9eTy7nXIz9zw5i7OO3XO9Y77Uvycff7KamW8sTiPk1C3/eBVnXvY4lwzbf13t7YLT92HSX77LCYfVcvNfp6UcYWVZsyb4cNknPPqHb3HRWYM567LRlLNZqEUU+QhcJeTAcia4ooZVSBpaN04tVi0vYzgNW7r8U8b9422O+PJ265XfO3YW3zxoh/XKTjhsZ+578pWWDK9irFq9hjMvfZwTDqvlmAO3/9znxx+2I4+Mm5NCZJWr19ad+PqBOyCJvfv3RDmx+MOVaYfVJELkaorb0lbOBFfUsIqIuD4iBkXEILVtud63blt2YIukBrJZuxoO3bsPr761hO17d1m3z1GDt+eVuUvWvZfg2IN35L6nW1f7G0BE8JOrnqF22y4MO3GPdeWvv/3hutejX3iTHft0SSG6ynX04H48N2UekL9dXbVqLd223CzlqJrObXAwEahNhlS8TX4ep1PKeL2SfKFrR679xVepyeXICR545lVGvziHWy/6OrXbdGFtwNyFS/nx1Z/1oA7eozfzF33EmwuWphh5OiZMf4d7x77KLn278pV/uxeAC0/fhztH/5PX5n1ATqJ3z0787kfp9aCmbdhvHuf5qW/z/ocr2fOUW/j5qftwypG7cN6IJzn4rDtp2zbHH392REX8w2+SCrn9LEa5HxM5Bria/GMiNydPITeopR8TqTZpPiZSLdJ8TKQaNMdjIm171Ub3068uat93Lv+XVB8TKevfhoh4BHiknNcws5aVH6pVHVU4/3dnZiXLeWV7M8ukKmqDc4IzsxIJuQZnZllURcuiOsGZWencyWBm2eQ2ODPLslyuOpZzcYIzs5KIUlYNTJcTnJmVRrgX1cyyy21wZpZRlTFTSDGc4MysJH4OzsyyS9XTi1odUZpZRWmOKcsl9ZH0lKSZkqZLOi8pv1jS25KmJNsx9Y65UNJsSbMkHdlYnK7BmVnJmqkXdTXwk4iYLKkzMEnSE8lnV0XEemsTJqvyDQF2Bb4IjJG0U0SsaegCrsGZWUnq2uCaWoOLiAURMTl5vQyYSeGFqY4F7oqITyJiDjCb/Op9DXKCM7PSCHJSURvQvW5RqWQbutFTSn2BgcCLSdE5kqZKulnSVklZySv1+RbVzEqkUia8XNTYlOWSOgH3AedHxFJJ/w1cSn4VvkuBEcD3KXKlvvqc4MysJKL5RjJIaks+ud0eEfcDRMTCep/fANStsl7USn31+RbVzEqj5lk2UPkdbgJmRsTv65X3qrfb8UDdauKjgCGS2ier9dUCEwpdwzU4MytZMz3oewBwKvCypClJ2S+BkyXtRf728w1gGEBETJc0EphBvgf27EI9qOAEZ2aboDmGakXEODbertbgSnzJ0qMFlx+tzwnOzErm2UTMLJOkknpRU+UEZ2Yly1XJaHsnODMrWZXkNyc4MyuNPKOvmWWZJ7w0s8yqkvzmBGdmJZLI1VTHICgnODMriacs30R77NSdMY+emXYYFWvCe9Ma36mVO7DnwLRDaBXcBmdmmeUEZ2bZJK9sb2YZJXAng5llV5XcoTrBmVmJipjMslI4wZlZyTxUy8wyyc/BmVmm+RbVzLJJUONbVDPLovwtasHlSCuGE5yZlaxK7lCd4MysdDnX4Mwsi8TG1/qrRA0mOEl/JL/w6kZFxLllicjMKpugJtf0GpykPsBtwBeAtcD1EXGNpK7A3UBf8gs/fzsiliTHXAicAawBzo2I0YWuUagG91JTfwEzy6ZmaoNbDfwkIiZL6gxMkvQEcDowNiIul3QBcAHwC0kDgCHArsAXgTGSdiq0un2DCS4ibq3/XlLHiFje5F/JzKqaiGZpg4uIBcCC5PUySTOB3sCxwKHJbrcCTwO/SMrviohPgDmSZgP7AOMbukajUwJI2l/SDGBm8n5PSddu4u9kZhmgIreizyf1BQYCLwI9k+RXlwR7JLv1BubWO2xeUtagYuY8uRo4ElicXPAfwMHFh25mWZNTFLUB3SW9VG8buuG5JHUC7gPOj4ilBS67sZxZsCpZVC9qRMzdYGhGg/e8ZpZtUkltcIsiYlDD51Jb8snt9oi4PyleKKlXRCyQ1At4NymfB/Spd/g2wPxCFy+mBjdX0mAgJLWT9FOS21Uza51qFEVthShfa7oJmBkRv6/30SjgtOT1acBD9cqHSGovqR9QC0wodI1ianA/AK4hf6/7NjAaOLuI48wso5ppqNYBwKnAy5KmJGW/BC4HRko6A3gLOAkgIqZLGgnMIN8De3ahHlQoIsFFxCLgO5v6G5hZtojmWZMhIsbRcF/EEQ0cMxwYXuw1iulF3V7SXyW9J+ldSQ9J2r7YC5hZ9khR1Ja2Ytrg7gBGAr3IP1x3D3BnOYMyswqWrKpVzJa2YhKcIuL/RsTqZPsLjXTNmll2iSh6S1uhsahdk5dPJcMl7iKf2P4V+FsLxGZmFao5xqK2hEKdDJPIJ7S6iuawep8FcGm5gjKzylb188FFRL+WDMTMqkO+F7X6a3DrSNoNGABsVlcWEbeVKygzq2xVUoFrPMFJuoj8yP4BwCPA0cA48vM4mVlrU9pQrVQV04v6LfIP3b0TEd8D9gTalzUqM6tYIt/JUMyWtmJuUT+OiLWSVkvagvzA10w96Dv/vY84/8pneG/JCnISpxzdnzOO243pry3mwj+O45NVa6ipyTH87MEM3LlH4yfMiD/e8Cov/X0JW27Rlj9cPhCA519cxF0PvMW8+R9zxcV7sOP2nQGY8vIH3DbyDVavDtq0EacP6cseu3ZJMfqWd+6IsTz+wht079KBcTecAsBvb3mBR8fPISfRvUsH/vizI+jVrVPKkTZdrgIeASlGMTW4lyR1AW4g37M6mUYGuAJIujkZ+TCtaSGWX01Njv9z1r48df1JPHTVN7n14Rm88uYSht80gf/9nb0Z/acT+Ol3v8Rvbmr0186Uww/qwa9/PmC9sm232ZwLzuvPgJ23WK98i85t+NWPd+EPvx3IeUNrufq6V1sy1Iow5Kv9ufs331iv7JyT9ubZ607m6f8Zwtf27cuVf5mYUnTNq25Gkca2tBUzFvXfkpf/I+kxYIuImFrEuW8B/osqaKvr2XVzenbdHIBOm7djxz5deGfxciRYtuJTAJau+JSe3TqmGWaL27X/lix8b+V6ZX16b77Rfbfv+1mtZNttNmfVqrWsWrWWtm2L+T80Gwbv0Zu33ll/OrPOHdute71i5aqqWRG+EFEZw7CKUehB370LfRYRkwudOCKeTWbprCpzFy5j+muLGbhzDy4eth/f/dVjXHbjBNZG8OCIbzR+AmP8xMX0265jq0puhQz/83jufmIWW3Rsx4NXHJ92OE1XIcOwilGoBjeiwGcBHN4cASQzfA4F2KZPwdmHy275x6sYdtkYLh62H507tuOK217ioqH7ccyB/fjrs6/zs6uf487fHpNqjJXurXkruPXuN7l4g1vb1uzfv7c///69/bn6zpe4cdRULvhf+6YdUpNVSw2uwf9iI+KwAluzJLfkOtdHxKCIGNRt627NddqSrVq9lqGXjeG4w3bk6APyzzjfO+ZVjj6gLwD/clA/psx6L7X4qsGi9z/h8mtmcv6wWnr17JB2OBXnxMN34uHnXks7jCYTzTPhZUvwPQQQEfzs6mep7dOFoSfsvq68Z7fNeeHlBQA8P2U+/Xpv0dApWr2Plq/msitn8N1vb8cuO/l7qvPa2x+se/3Y+DnU9tkqvWCaUa7ILW1e2R6YOH0h942dTf++W3Hk2flp4X9x2pf53bkHcfF141m9JmjfrobLzz0o5Uhb1og/zWLazA9Z+tFqzjh3IkNO2JbOndpww22v8+GyVVw6Yib9tuvIxT/flUeeWMCChSsZ+eA8Rj44D4CLfz6ALlu2a+Qq2XHWb0bz/NS3ef/Dlex+yp/5xan7MmbiG8ye+wG5nNimR2dGnHdo2mE2i2q5RVVEeQKVdCf5ERDdgYXARRFxU6Fj9vrSnjHm+UfLEk8WTFrkpTAac2DPgWmHUNEO2u9wJk+a0qQugt4D+sew228oat+L9j54UqFFZ8qtmKFaIj9l+fYRcYmkbYEvRETBh8Ii4uRmitHMKky1DLYv5jb5WmB/oC5hLQP+VLaIzKziNffCz+VSTBvcvhGxt6S/A0TEEkmtp2HFzNYjVcY402IUk+BWSaohmaZc0tbA2rJGZWYVrRJqZ8UoJsH9AXgA6CFpOPnZRX5V1qjMrKJVSxtcMWNRb5c0ifyUSQKOiwh355m1UpXSvlaMYtZF3RZYAfwVGAUsT8rMrJXKKYraGrOxWYckXSzpbUlTku2Yep9dKGm2pFmSjmzs/MXcov6Nzxaf2QzoB8wCdi3iWDPLoGa8Rb2Fjc86dFVEXFm/QNIAYAj53PNFYIyknSJiTUMnL+YWdff675NZRoY1sLuZZZxovmFYJc46dCxwV0R8AsyRNBvYBxjf0AElx5lMk/TlUo8zs4xQ/lGRYrYmOEfS1OQWtm4Ab29gbr195iVlDSpmJMOP673NAXsDnlbDrBUroWbUXdJL9d5fHxHXN3LMf5Nfd7lu/eURwPfZeN9GwSxaTBtc53qvV5Nvk7uviOPMLINKnNF3UaljUSNi4bprSTcADydv5wF96u26DTC/0LkKJrjkAd9OEfGzUgI0s2wr51RIknpFxILk7fFAXQ/rKOAOSb8n38lQSyPrwxSasrxNRKwuNHW5mbVOzdWLWn/WIUnzgIuAQyXtRf728w2STs2ImC5pJDCD/N3k2YV6UKFwDW4C+fa2KZJGAfcAy+s+jIj7N+1XMrNqJpovwTUw61CD06pFxHBgeLHnL6YNriuwmPwaDHXPwwXgBGfWSlXL4mCFElyPpAd1Gp8ltjrVMRDNzMqiWhZ+LpTgaoBObELXrJllV6Us6lyMQgluQURc0mKRmFnVqJL8VjDBVcvvYGYtqG7ZwGpQKMEd0WJRmFkVKW6mkErQYIKLiPdbMhAzqx7VcnvndVHNrGTVsi6qE5yZlaQ5p0sqNyc4MytZrkqeE3GCM7PSSMgJzsyyqJoWnXGCM7OSqUpSnBOcmZWsSu5QneDMrHQ51+DMLIvy88E5wZUsh+hQs1naYVSsQ3qVNLV9q7RgxcLGd2rFVsXqZjlPleS3ykpwZlYd3MlgZpnlGpyZZZKSP9XACc7MSiOoqZIqnBOcmZWsOtKbE5yZlUjgsahmll3Vkd6qZ1onM6sgSmYUaWwr4jw3S3pX0rR6ZV0lPSHp1eTnVvU+u1DSbEmzJB3Z2Pmd4MysZCpyK8ItwFEblF0AjI2IWmBs8h5JA4AhwK7JMddKqil0cic4MytJflUtFbU1JiKeBTZc/+VY4Nbk9a3AcfXK74qITyJiDjAb2KfQ+Z3gzKxEKvrPJuoZEQsAkp89kvLewNx6+81LyhrkTgYzK1kJnajdJb1U7/31EXH9pl52I2UFV79xgjOzkpVQO1sUEaXOErFQUq+IWCCpF/BuUj4P6FNvv22A+YVO5FtUMyuJVPy2iUYBpyWvTwMeqlc+RFJ7Sf2AWmBCoRO5BmdmJWuusaiS7gQOJX8rOw+4CLgcGCnpDOAt4CSAiJguaSQwA1gNnB0Rawqd3wnOzErWXBNeRsTJDXx0RAP7DweGF3t+JzgzK4kXfjazTPNYVDPLqOpZGdUJzsxKVh3pzQnOzDaBVB2tcE5wZlYy1+DMLJPyLXDVkeKc4MysdO5FNbOsqo705gRnZiXzYyJmllVqvqFa5eYEZ2abwAnOzDLKvahmlknV0wLnBGdmm8JtcNXl3BFjefyFN+jepQPjbjgFgN/e8gKPjp9DTqJ7lw788WdH0Ktbp5QjTd+rc5fw/cseWff+zQVLufC0/fjhiQNTjCp9XzlzFB07tCWXE21qxD2/P5LHxr3Fn+6cxuvzlnL3lV9jt9quaYfZDJq0oEyLKluCk9QHuA34ArCW/GIT15Trek015Kv9OeObu3P2f45ZV3bOSXtz4en7AXD9A//gyr9MZMR5h6UVYsWo7bMVz133HQDWrFnLgCE38fUDd0g5qspwy/DD2WqL9uve1263JX+48EAuvnZiilE1v1af4MhPKfyTiJgsqTMwSdITETGjjNfcZIP36M1b7yxdr6xzx3brXq9Yuapq5sBqSc/8fS59v7gl2/bcIu1QKtIOfbZMO4SyqJZ/C2VLcMl6hnVrGy6TNJP8GoYVmeAaMvzP47n7iVls0bEdD15xfNrhVJz7n3qFEw/bOe0wKoIQZ/76aST49pE78O2jdkw7pDKqjgTXInOeSOoLDARebInrNad//97+TL3jdL51+E7cOGpq2uFUlE9XreHR8a9z3CFZ/odcvNt/9xXuu/pIrrvoEO58ZDYvTXu38YOqlIrc0lb2BCepE3AfcH5ELN3I50MlvSTppUWLFpc7nE124uE78fBzr6UdRkUZM+EN9qztQY+tOqYdSkXo0a0DAN26bMYR+/Vm6qvvpxxRedTNJlLGle2bTVkTnKS25JPb7RFx/8b2iYjrI2JQRAzq3r1bOcMp2Wtvf7Du9WPj51DbZ6v0gqlA9z71CicetlPaYVSEFStXs3zFqnWv/9+Ud6jdNpvtbyCk4ra0lbMXVcBNwMyI+H25rtNczvrNaJ6f+jbvf7iS3U/5M784dV/GTHyD2XM/IJcT2/TozIjzDk07zIqxYuUqnp70Fledf3jaoVSExR+s5NzfjANg9Zq1fP2Q7TjoS70YM34ew6+fxPsffsIPL3mG/ttvxQ3/cWiqsTaHSqidFUMRUZ4TSwcCzwEvk39MBOCXEfFIQ8fs/aW94rkXnixLPFlQk6tJO4SKt2DFwrRDqGjfPOQEXp48rUnZafeBu8WDz9xb1L47brnLpIgY1JTrNUU5e1HHURntjGbWnNR8AxkkvQEsA9YAqyNikKSuwN1AX+AN4NsRsWRTzl8dK0eYWYVp1n7UwyJir3o1vQuAsRFRC4xN3m8SJzgzK1mZe1GPBW5NXt8KHLepJ3KCM7OSqLRe1O51j4El29ANThfA45Im1fusZzJQoG7AQI9NjdWD7c2sZCXUzhY10slwQETMl9QDeELSP5se3WdcgzOzkjXXLWpEzE9+vgs8AOwDLJTUCyD5uclDQpzgzKx0zdDHIKljMhEHkjoCXwOmAaOA05LdTgMe2tQwfYtqZiVrpgd9ewIPJG11bYA7IuIxSROBkZLOAN4CTtrUCzjBmVnJmiPBRcTrwJ4bKV8MHNHkC+AEZ2YlqutFrQZOcGZWsmoZi+oEZ2Ylq4705gRnZpvCt6hmllW+RTWzTBKQc4Izs0yqlAUXiuAEZ2Ylqoz1ForhBGdmJauWBOexqGaWWa7BmVnJPJLBzDLJvahmlm2uwZlZNrkX1cwyrDrSmxOcmW0C1+DMLLvcBmdmWeReVDPLNtfgzCyrqiO9OcGZWcn8mIiZZZgTnJllklQ9Y1EVEWnHsI6k94A3046jnu7AorSDqGD+fhpXad/RdhGxdVNOIOkx8r9XMRZFxFFNuV5TVFSCqzSSXoqIQWnHUan8/TTO31G6PB+cmWWWE5yZZZYTXGHXpx1AhfP30zh/RylyG5yZZZZrcGaWWU5wZpZZTnAbIekoSbMkzZZ0QdrxVBpJN0t6V9K0tGOpRJL6SHpK0kxJ0yWdl3ZMrZXb4DYgqQZ4BfgqMA+YCJwcETNSDayCSDoY+Ai4LSJ2SzueSiOpF9ArIiZL6gxMAo7z36GW5xrc5+0DzI6I1yPiU+Au4NiUY6ooEfEs8H7acVSqiFgQEZOT18uAmUDvdKNqnZzgPq83MLfe+3n4L6dtIkl9gYHAiymH0io5wX3exkYR+z7eSiapE3AfcH5ELE07ntbICe7z5gF96r3fBpifUixWpSS1JZ/cbo+I+9OOp7Vygvu8iUCtpH6S2gFDgFEpx2RVRPm5hG4CZkbE79OOpzVzgttARKwGzgFGk28cHhkR09ONqrJIuhMYD+wsaZ6kM9KOqcIcAJwKHC5pSrIdk3ZQrZEfEzGzzHINzswyywnOzDLLCc7MMssJzswyywnOzDLLCa6KSFqTPHIwTdI9kjZvwrlukfSt5PWNkgYU2PdQSYM34RpvSPrc6ksNlW+wz0clXutiST8tNUbLNie46vJxROyVzODxKfCD+h8mM6GULCLObGSmi0OBkhOcWdqc4KrXc8COSe3qKUl3AC9LqpF0haSJkqZKGgb5p+sl/ZekGZL+BvSoO5GkpyUNSl4fJWmypH9IGpsMFv8B8L+T2uNBkraWdF9yjYmSDkiO7SbpcUl/l3QdGx/Xux5JD0qalMybNnSDz0YksYyVtHVStoOkx5JjnpPUv1m+Tcskr2xfhSS1AY4GHkuK9gF2i4g5SZL4MCK+LKk98Lykx8nPaLEzsDvQE5gB3LzBebcGbgAOTs7VNSLel/Q/wEcRcWWy3x3AVRExTtK25Ed97AJcBIyLiEskfR1YL2E14PvJNToAEyXdFxGLgY7A5Ij4iaRfJ+c+h/wiLj+IiFcl7QtcCxy+CV+jtQJOcNWlg6QpyevnyI93HAxMiIg5SfnXgD3q2teALYFa4GDgzohYA8yX9ORGzr8f8GzduSKioTnfvgIMyA+5BGCLZGLHg4ETkmP/JmlJEb/TuZKOT173SWJdDKwF7k7K/wLcn8zOMRi4p9612xdxDWulnOCqy8cRsVf9guQf+vL6RcCPImL0BvsdQ+PTPqmIfSDftLF/RHy8kViKHvsn6VDyyXL/iFgh6WlgswZ2j+S6H2z4HZg1xG1w2TMa+GEyXQ+SdpLUEXgWGJK00fUCDtvIseOBQyT1S47tmpQvAzrX2+9x8reLJPvtlbx8FvhOUnY0sFUjsW4JLEmSW3/yNcg6OaCuFnoK+VvfpcAcSScl15CkPRu5hrViTnDZcyP59rXJyi8Kcx35mvoDwKvAy8B/A89seGBEvEe+3ex+Sf/gs1vEvwLH13UyAOcCg5JOjBl81pv7H8DBkiaTv1V+q5FYHwPaSJoKXAq8UO+z5cCukiaRb2O7JCn/DnBGEt90PJ28FeDZRMwss1yDM7PMcoIzs8xygjOzzHKCM7PMcoIzs8xygjOzzHKCM7PM+v8dSBm6h1ecAAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 对模型进行评价，在测试集对模型进行测试，计算准确率，f1_score\n",
    "\n",
    "# In[11]:\n",
    "\n",
    "\n",
    "def eval(clf,X_train, X_test, y_train, y_test):\n",
    "    predicted = clf.predict(X_train)  # 模型预测\n",
    "    accuracy = accuracy_score(y_train, predicted)\n",
    "    print(\"训练集准确率\", accuracy)\n",
    "    f1 = f1_score(y_train, predicted,average='macro')\n",
    "    print(\"训练集f1_score\", f1)\n",
    "\n",
    "    predicted = clf.predict(X_test)\n",
    "    accuracy = accuracy_score(y_test, predicted)\n",
    "    print(\"测试集准确率\", accuracy)\n",
    "    f1 = f1_score(y_test, predicted,average='macro')\n",
    "    print(\"测试集f1_score\", f1)\n",
    "\n",
    "    plot_confusion_matrix(clf, X_test, y_test,cmap=\"GnBu\")\n",
    "\n",
    "\n",
    "# In[12]:\n",
    "\n",
    "\n",
    "eval(clf,X_train, X_test, y_train, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "训练集准确率 1.0\n",
      "训练集f1_score 1.0\n",
      "测试集准确率 0.845\n",
      "测试集f1_score 0.7967069927353312\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAATgAAAEJCAYAAAAAWTtiAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAk6UlEQVR4nO3deZwU1bn/8c+3B9mGfR9ZBHGAAAYUJIIRF6IgmkASSeCqwUjcgku2mwv5mRgXcrnJ1WgWNcQYyVVANBoxEhZRQ0xUBMQFkDiCwgiy7/vA8/ujCm2B6emC6anunuftq17Tfbrq1DP9wmdO1alzjswM55zLR4m4A3DOuUzxBOecy1ue4JxzecsTnHMub3mCc87lLU9wzrm85QnOORcbSd+VtFjS25ImS6otqYmk2ZLeDX82Ttp/rKQSScskDaywfn8OzjkXB0mtgZeArma2W9JUYDrQFdhkZuMljQEam9l/SeoKTAb6ACcCzwGdzOxAeeeokfHfIgKdUGiq3SjuMLJWj07N4w4h6ynuALLcBx+sYuOGjcf1NRU0KTbbvyutfW3H6plmNijFLjWAOpL2A3WB1cBY4Nzw84nAi8B/AUOAKWa2F1ghqYQg2b2cqvKsodqNqNX72rjDyFovzLwu7hCyXoEK4g4hq51z5oDjrsP270r7/9M9L97arNx6zD6U9L/ASmA3MMvMZklqaWZrwn3WSGoRHtIaeCWpitKwrFx+D845F40AKb0Nmkman7Rd83E1wb21IUAHgkvOQkmXV3Dmw6W8x5ZVLTjnXI4oSLulvMHMepfz2ReAFWa2HkDSk0A/YK2korD1VgSsC/cvBdomHd+G4JK2XN6Cc85FlGbrTRXe6lsJnCmpriQBA4ClwDRgZLjPSODp8PU0YLikWpI6AMXAvFQn8Baccy4aATr+tpGZvSrpCWAhUAa8DkwA6gFTJY0iSILDwv0Xhz2tS8L9R6fqQQVPcM65Y5GonP5qM7sVuPWw4r0Erbmj7T8OGJdu/Z7gnHPRVXz5mRU8wTnnIlKlXKJWBU9wzrloBBR4gnPO5SVvwTnn8pnfg3PO5aVKekykKniCc85FV0mPiWSaJzjnXDQSJHJjUgNPcM656PwenHMub/klqnMuP/ljIs65fHVoPrgc4AnOORedJzjnXH5SlAkvY+UJzjkXjV+iOufyl3cyOOfymT8m4pzLW36J6pzLSzk0VCs3LqSdc9klofS2FCR1lrQoadsm6TuSmkiaLend8GfjpGPGSiqRtEzSwArDrIRf1TlX3SiR3paCmS0zs55m1hPoBewCngLGAHPMrBiYE75HUldgONANGATcJyllU9ITnHMummgr26drAPCemX1AsNr9xLB8IjA0fD0EmGJme81sBVAC9ElVqd+Dc85FJJR+8momaX7S+wlmNuEo+w0HJoevW5rZGoBwdfsWYXlr4JWkY0rDsnJ5gnPORRLxOd8NZtY7ZX1STeBLwNg0Tn04S3WAJzjnXDSCgoJKfUzkImChma0N36+VVBS23oqAdWF5KdA26bg2wOpUFVfrBHf9V0/jiou6gRlLVmxk9C9mU6d2DR66ZTDtWjZg5dptfPOO6WzdsZe2Levz6kPfoGTVZgDmL/2I7937fMy/QdU64xuPUK9OTQoSoqAgwczffJXbf/8ys175gJonJDipqAH3fP88GtarFXeosTlw4CAX3PA4Rc0KefSOS9i8bQ9Xj5vJqrXbaduyPg/eMpBG9WvHHeZxi3CJmo4RfHJ5CjANGAmMD38+nVQ+SdLdwIlAMTAvVcUZ7WSQNCjszi2RNCaT54qqqGkh1w7twfnfnky/qx8lUSC+cl4nvju8N3NfX0XvKycy9/VVfHf4J63r91dvof91k+h/3aRql9wOeeLnX+S5+4cx8zdfBaD/6W14ccLXeP6Br9GxdSN+PeX1mCOM14Sn3qRTu4+fauBXjy2k/2ltePXhy+l/Wht+9djCGKOrHJXZxyCpLnAB8GRS8XjgAknvhp+NBzCzxcBUYAkwAxhtZgdS1Z+xBBd23/6WoPnZFRgRdvNmjRoFCWrXqkFBQtStdQIfbdzJRf06MnnWEgAmz1rC4LM6xhxldju3V1tqhIsAn/6ZlqzesCPmiOKzev0Onpv3PpcN+uSf+YyXV/D1C7oA8PULuvC3f62IK7xKJSmtrSJmtsvMmprZ1qSyjWY2wMyKw5+bkj4bZ2Ydzayzmf2tovozeYnaBygxs+UAkqYQdPMuyeA507Zm405+/fhC3pp0FXv2lvHCgpW8sGAlLRrXZe2mXQCs3bSL5o3qfHxMu1YN+fsDI9i+cx/j/vgyL7+d8vI/7wgx/EfPIuCKi7tyxeBP/72aMvMdvnRO9f2DcMv9L/GTb/Vjx+79H5et37yLlk0LAWjZtJANW3bHFV6lquRL1IzJZIJrDaxKel8KfC6D54ukYb1aDO53Mj0vf5itO/by8E8G87UBncvdf+2mXZx62UNs3raHHsUtePS2S+j7rUfYvmtfFUYdr2m/HEqr8H/Sr4/5K6e0bUTfU08E4J5JCygoEF89vzjmKOMx65X3adaoDj06teCfb3wYdziZFfkRt/hkMsGl1aUr6RrgGgBqNcxgOJ927ult+eCjbWzcGvxFfealEvp0O5F1m3fRsknQimvZpC7rw7+4+/YfYN/+4HL/jXfXsWLNVjq2acSif68r9xz5plXYEmnWqA4XndWeRe+so++pJzJ19jKem7eSqeMvyZm/7JVt3uI1zHxlBXNe+4A9+8rYsWs/14+fTfPGdVm7cSctmxayduNOmiVdEeQqIRKV24uaMZnsZEirS9fMJphZbzPrrRMKMxjOYcGt207vz7SiTq0gx59zWluWrdzEjJeXM+LC4NJrxIVd+du/3gOgacM6JMKxdScVNeDk1o14f83Wo1eeh3bt2c+OsLW6a89+/r6glM7tm/D8ayv5zdRFPPzTQdStfULMUcbnllF9eWPSlSz4v28w4UcD+XzP1tw/5gIGntmex2a/A8Bjs99hUN8OMUdaOSrrHlymZbIF9xpQLKkD8CHBk8r/kcHzRbLgnbVMm1vCi/eP4MCBg7xZsp6Jz75NYZ0T+OMtg7l8UDdK123nyjueBaDfZ1szduSZHDhwkAMHje/f8zxbtu+N+beoOus37+aq22YCUHbgIF8+7xTOP6Mdfa+cxL79Bxg+9q8AnN6lJT+/uX+coWaVm4b34uo7Z/DojKW0aVGPB28ZFHdIxy+HLlFllvJB4OOrXBoM3AMUAA+Z2bhU+yfqt7Zava/NWDy5bs3M6+IOIesVpB57Xe2dc+YAFi5YdFzp6YSiYmt25T1p7fvR+EsWVDSSIZMy+qCvmU0HpmfyHM65qhU8B5cbTbhqPZLBOXdsEj5luXMuL+XQPThPcM65iIS8Beecy0c5tCyqJzjnXHTeyeCcy09+D845l88SidxYzsUTnHMuEpEzC9t7gnPORSS8F9U5l7/8HpxzLk9lx0wh6fAE55yLJJeeg8uNrhDnXPZQ0IuazlZhVVIjSU9IekfSUkl9JTWRNFvSu+HPxkn7jw0XsVomaWBF9XuCc85FVlmragH3AjPMrAvQA1gKjAHmmFkxMCd8T7ho1XCgGzAIuC9c3KpcnuCcc5EpobS2lHVIDYD+wB8AzGyfmW0hWJxqYrjbRGBo+HoIMMXM9prZCqCEYHGrcnmCc85FEnFd1GaS5idt1yRVdTKwHvijpNclPSipEGhpZmsAwp8twv2PtpBV61SxeieDcy4aQSL9XoYNKWb0rQGcDtxoZq9KupfwcrT8Mx8h5ZTk3oJzzkUkEon0tgqUAqVm9mr4/gmChLdWUhFA+HNd0v4VLmSVzBOccy4SUTn34MzsI2CVpEMLEg8gWBh+GjAyLBsJPB2+ngYMl1QrXMyqGJiX6hx+ieqci0aVOl3SjcCjkmoCy4FvEjS8pkoaBawEhgGY2WJJUwmSYBkw2swOpKrcE5xzLrLKym9mtgg42j26AeXsPw5IuTpfMk9wzrnIfKiWcy5v+Wwizrm8JKXVQ5oVPME55yKL8BxcrDzBOeciy5H85gnOOReNfEZf51w+815U51zeypH85gnOOReRRKIgN0Z5eoJzzkWSS1OWZ1WC69GpGc/PuDbuMLJWydZ34w4h63Vr3C3uELJc5WQmvwfnnMtbnuCcc/lJvrK9cy5PCbyTwTmXv3LkCtUTnHMuIvnK9s65POZDtZxzeSmXnoPLjTuFzrmsovAytaItjXrel/SWpEWS5odlTSTNlvRu+LNx0v5jJZVIWiZpYEX1e4JzzkUjKEgorS1N55lZz6T1U8cAc8ysGJgTvkdSV2A40A0YBNwnqSBVxZ7gnHORBJeoltZ2jIYAE8PXE4GhSeVTzGyvma0ASoA+qSryBOeci0xKb0uDAbMkLZB0TVjW0szWAIQ/W4TlrYFVSceWhmXl8k4G51xkifRbZ80O3VsLTTCzCUnvzzKz1ZJaALMlvZOirqOlzJSBeIJzzkUiIg3Z35B0b+0IZrY6/LlO0lMEl5xrJRWZ2RpJRcC6cPdSoG3S4W2A1alOXm6Ck/RrUmRHM7spVcXOuTwlKEgc8/21T6qRCoGEmW0PX18I3A5MA0YC48OfT4eHTAMmSbobOBEoBualOkeqFtz8FJ8556qxSnoOriXwVPg4SQ1gkpnNkPQaMFXSKGAlMAzAzBZLmgosAcqA0WZ2INUJyk1wZjYx+b2kQjPbeTy/jXMu9wmLcg+uXGa2HOhxlPKNwIByjhkHjEv3HBX2okrqK2kJsDR830PSfemewDmXf5TmFrd0HhO5BxgIbAQwszeA/hmMyTmX5RKytLa4pdWLamarDht2kfK61zmXvyI84xa7dBLcKkn9AJNUE7iJ8HLVOVc9FWRB6ywd6VyiXgeMJnhi+EOgZ/jeOVdNZXioVqWpsAVnZhuAy6ogFudcDhC5syZDOr2oJ0t6RtJ6SeskPS3p5KoIzjmXnXKlBZfOJeokYCpQRPD08OPA5EwG5ZzLYuGqWulscUsnwcnM/s/MysLtESoY4Oqcy1/C0t7ilmosapPw5QuSxgBTCBLb14FnqyA251yWqoyxqFUhVSfDAoKEdqiheW3SZwbckamgnHPZLeefgzOzDlUZiHMuNwS9qLnfgvuYpO5AV6D2oTIz+1OmgnLOZbccacBVnOAk3QqcS5DgpgMXAS8BnuCcq45yaKhWOr2olxJMXfKRmX2TYHqTWhmNyjmXtUTQyZDOFrd0LlF3m9lBSWWSGhBMH5x3D/r2+caj1Ktbk0RC1CgQM379VRYv38iYX81l554y2rSsx29/OID6hTXjDrVKrN2wh3G/XcymLfuQxJe+cCLDBrfjhZfX8tDjK/jgw51M+NkZdOnYAID9ZQf5xYR3WPbeNpQQN1/ZidO6Na7gLPmjdN12vv3zWazdtItEQowc3J3rvtITgAl/eYMHn36DgoIEF36uPbdd/fl4g60EiSx4BCQd6SS4+ZIaAb8n6FndQQXTBANIegi4BFhnZt2PJ8iq8vj/XELThnU+fv+DX/6dn1x9Jn0/eyKTZ77D/U+8wQ9HnhFjhFWnoECMvqKYzic3YNfuMkaNmUfvzzahQ9t6jPvBqfxiwqfXBnnmuQ8BmHjXmWzeuo8f/GwRv//vM0hkw9OeVaBGQYI7rj2bHsUt2L5rH+d/ewrn9mrL+s27+Nu/lvOP3/0HtWrWYP3mXXGHWiny5hLVzL5tZlvM7AHgAmBkeKlakYcJFmfNWe99uIUzTy0CoP/pbXj2n8tjjqjqNGtci84nB62zunVq0L51IRs27aV9m0LanVh4xP7vl+6kV/egxda4YU3qFdbgneXbqjTmOLVqWkiP4mB1u/p1a9KpXWPWbNjJQ8+8xc3De1GrZtCWaN64bpxhVgqR3jCtrB6qJen0wzegCVAjfJ2Smc0FNlVirBkliRE/ms7AG/7MI9OXAND5pCbMfOUDAP46dzmr11fPGdvXrNvNv1dsp+spDcvd55T29Xlp/gbKDhxk9brd/Hv5dtZt2FuFUWaPlR9t482S9fTq0pL3Srfw8lur+cKNj3HJ955g4bK1cYd3/HJoqFaqS9S7UnxmwPmVEUC42Os1AG3atqmMKo/J03cPoVXTQjZs2c3wsX/llLaNuPt75/Dj+//FLx9dwIVnnkTNGtVvnexde8q45a63uOnKThTWLf+fy+Dzini/dCdXj3mNVs1r071zQwoKsuBfeBXbsXsfI29/lp9d358GhbUoO3iQrTv2MvtXX2PhsrVcdeffeP1PI1GuXOOVozJbZ5IKCBa5+tDMLglHUT0GtAfeB75mZpvDfccCowgm3b3JzGamqjvVg77nVUr0FQgXgZ0AcFqvHrG1aVs1DS67mjWqw6B+HXh92Xquv7QHU352MQDvlW5hzryVcYUXi7Kyg9xy11tccHYrzvlci5T71ihIcNOVnT5+f/0t82lTVCfFEflnf9kBRt42nUvP78wXzz4FgBOb1eOSz3dEEr26tCIh2Lh1N80a5e6lqqj0CS9vJphEt0H4fgwwx8zGh8NExwD/JakrMBzoRjDxx3OSOqVaWav6NUmOYtee/ezYte/j139fWEqX9o3ZsGU3AAcPGvdOXsgVF3eNM8wqZWaMf2Ap7VsXMvySdhXuv2fvAXbvCf6dvfbmRgoKRIc29TIdZtYwM266aw6d2jVh9KWf3MG5uF9H5r5eCkBJ6Wb2lR38VEdWrkqkuVVEUhvgYuDBpOIhwKFV/SYCQ5PKp5jZXjNbAZQQLBRdLl/ZHli/eTejbg9aumUHjC+fdwrn9W7Hg395i4efWQzARWd1YPiFneMMs0q9tWwrM+d+xMnt6vHN/3wVgGtGdGR/2UHueejfbNm2jx+OX8Qp7etz9/87jc1b9/H9cYtIJKBZk1rcckP1+WMA8OriNTz23Dt07dCU/tdOAuDHV/XjskFdufGu5+h39SPUrFHAff95Qc5fnkKlXqLeA/wQqJ9U1tLM1gCEq9sfunxoDbyStF9pWFaujCU4SZMJRkA0k1QK3Gpmf8jU+Y7HSUUNeO7+YUeUf2voqXxr6KkxRBS/z3ZpxD+mHnVpSvr3OfJytahFHSbd2zfTYWWtM7ufyKbZNx31s9+NGVjF0WRWxCUBm0lKXkR+QnhbCkmHHiNbIOncNE99uJSZNp2hWiKYsvxkM7tdUjuglZmlfBbOzEZUVLdzLjdFGGy/wcx6l/PZWcCXJA0mGOfeQNIjwFpJRWHrrYhgcAEELba2Sce3AVanjDONAO8D+gKHEtZ24LdpHOecy1OVsfCzmY01szZm1p6g8+B5M7scmAaMDHcbCTwdvp4GDJdUS1IHoJgKBh2kc4n6OTM7XdLrYVCbw+UDnXPVkJTxcabjgamSRgErgWEAZrZY0lRgCVAGjE7VgwrpJbj94XMqBiCpOXDwOIJ3zuW4yu4mMbMXgRfD1xsJJvg42n7jgHHp1ptOgvsV8BTQQtI4gtlFbkn3BM65/JM3E16a2aOSFhBkVAFDzcxXtneumorYixqrdHpR2wG7gGeSy8ysej3W75z7WN604AhW0Dq0+ExtoAOwjGC4hHOuGsqbBGdmn3rSNZxJ5NpydnfO5TmRO2M8I49kMLOFkqrHrI/OuSOpcmcTyaR07sF9L+ltAjgdWJ+xiJxzWS+fWnDJg2DLCO7J/Tkz4Tjnst2hGX1zQcoEFz7gW8/M/rOK4nHO5YCcb8FJqmFmZelMT+6cq17yoRd1HsH9tkWSpgGPAx8vSmBmT2Y4NudcFhL5keAOaQJsJFiD4dDzcAZ4gnOumsqVOTtTJbgWYQ/q23yS2A7JjfTtnMuIfFj4uQCoxzHMoumcy19SfrTg1pjZ7VUWiXMuZ+RIfkuZ4HLld3DOVaEMLBuYMakS3NFXHHHOVXOW+72oZrapKgNxzuWOXLm883VRnXOR5cpQrVwZceGcyxKHpks63pXtJdWWNE/SG5IWS7otLG8iabakd8OfjZOOGSupRNIySRUuOOsJzjkXWUJKa6vAXuB8M+sB9AQGSToTGAPMMbNiYE74HkldCZYX7AYMAu4Lx8uXH+fx/JLOuWpIQmluqVhgR/j2hHAzYAgwMSyfCAwNXw8BppjZXjNbAZQAfVKdwxOccy6SdBd9DtNbM0nzk7ZrPlWXVCBpEcHq9bPN7FWgpZmtAQh/tgh3bw2sSjq8NCwrl3cyOOciU/r9qBvMrHd5H4YLN/eU1Ah4SlL3lKc9ShWpTu4tOOdcZIeGa1W0pcvMthAs/DwIWCupKDiPighadxC02NomHdYGWJ2qXk9wzrnIEiitLRVJzcOWG5LqAF8A3gGmASPD3UYCT4evpwHDJdWS1AEoJpjWrVx+ieqciySYD65SHvUtAiaGPaEJYKqZ/VXSy8BUSaOAlcAwADNbLGkqsIRg+YTR4SVuubIqwSWUoE6N2nGHkbW6N0l1e8IBvL/9g7hDyGr7Du6rlHoqI7+Z2ZvAaUcp30g5Q0XNbBwwLt1zZFWCc87lhgidDLHyBOeciywf5oNzzrkjKPwvF3iCc85FIyjIkSacJzjnXGS5kd48wTnnIhJUOM40W3iCc85FlhvpzROcc+4YeAvOOZe3ciO9eYJzzkUUrKqVGynOE5xzLiJ/Ds45l8dypAHnCc45F5234JxzeSnqZJZx8gTnnIvMW3DOubxVSRNeZpwnOOdcJIcWfs4FnuCcc5HlykiGXEnEzrmsEXFl1PJqkdpKekHSUkmLJd0cljeRNFvSu+HPxknHjJVUImmZpIEVReoJzjkX2fGnNyBYOOb7ZvYZ4ExgtKSuwBhgjpkVA3PC94SfDQe6ESwveF+4YE25PME55yKTEmltqZjZGjNbGL7eDiwlWKl+CDAx3G0iMDR8PQSYYmZ7zWwFUAL0SXUOT3DOucgqqQX3SX1Se4IVtl4FWprZGgiSINAi3K01sCrpsNKwrFzeyeCciyRIXmmnr2aS5ie9n2BmEz5Vn1QP+DPwHTPblqID42gfWKqTe4JzzkWXfi/qBjPrXX41OoEguT1qZk+GxWslFZnZGklFwLqwvBRom3R4G2B1qpP7JapzLrLKuERV0FT7A7DUzO5O+mgaMDJ8PRJ4Oql8uKRakjoAxcC8VOfwFpxzLqKod9jKdRZwBfCWpEVh2Y+A8cBUSaOAlcAwADNbLGkqsISgB3a0mR1IdQJPcM65aFQ5Q7XM7CXKz5QDyjlmHDAu3XN4gnPOHYPcGMngCc45F5nPJuKcy0uVdgeuCniCc85FlyOD7T3BAaXrtnP9/8xi3eadJCRGXtyd675yGpu37eGqO6ezcu022rVswB9/PJhG9WvHHW4s/Ds60k9/vZC58z+iScNaPPGr4J74A1OW8uTsD2jcoCYAN1zelbN7tWL//oPc+cAilpRsQQn44ahT6d29eZzhHwdfdAZJbYE/Aa2AgwRPMN+bqfMdjxoFCe687mx6FLdg+659nHf9ZM7t1Y5JM5fS/7S2fHfEGfxy8mv8csp8brv683GHGwv/jo70xfPb8fXBJ/Pjexd8qvzyL3bkG0OLP1X25Oz3AXj83vPZtGUvN9zxLx75xbkkErmRKA6XKwkukw/6ljdTQNZp1bSQHsXBcLf6dWvSqV0T1mzYwd/+9R4jLgxCHnFhV6b/8704w4yVf0dH6tWtGQ3rn5DWvstXbafPqUGLrUmjWtQvPIElJVsyGF1mSUpri1vGElyKmQKy2sqPtvFmyTp6dWnFus27aNW0EAj+B1+/ZXfM0WUH/45SmzJ9OV/7zvP89NcL2bZjHwCdOjTkxXlrKDtwkA/X7mTJe1v4aOOumCM9HpU93D4zquQe3GEzBWStHbv38Y3bnuW/v30ODQprxR1OVvLvKLVhgzpw9bAuSHDfpKXc/ce3+emNpzNkQDtWlG7nsh+8SFHzuvTo0pSCRO6OlIw/daUn4wnu8JkCjvL5NcA1AG3btT384yqzv+wAI3/6LMMGdOaLZ58CQIvGdflo405aNS3ko407ad6oTmzxZQP/jirWtNEnHSxfufAkbrrzFSC4h/mDq079+LORY+bS7sTCKo+vMkScTSRWGf0TUs5MAZ9iZhPMrLeZ9W7evGkmwymXmXHj/z5Hp5OaMPrS0z8uH9T3ZCbPWgLA5FlLuKhfx1jiywb+HaVn/aY9H79+/pU1dDypAQC795axe08ZAK8sWkdBgejYtkEsMR6/9O6/ZcM9uEz2opY3U0DWeeXt1Tz23Dt07dCUs699FIAfX9WP7w7vzTfvnM4jMxbTpkV9Hv7xxTFHGh//jo405q7XWLB4A1u27WPgt2Zw3fAuLHh7A8tWbEOCohZ1ueW6ngBs3rqXb9/2MglB86Z1uPPmXvEGf5xypQUns5TzxR17xdLngX8AbxE8JgLwIzObXt4xvXqfZv989cWMxOOqh/e3fxB3CFnt0vNG8Pbri48rO516Wnf7y9+fSGvfUxp+ZkGq+eAyLWMtuApmCnDO5SrlzEAGH8ngnDsWuZHhPME55yLLlXtwnuCcc5GI7OghTYcnOOdcZN6Cc87lrVxJcLk7VsQ5F59KGooq6SFJ6yS9nVTWRNJsSe+GPxsnfTZWUomkZZIGVlS/JzjnXGRK8780PAwMOqxsDDDHzIqBOeF7wtmIhgPdwmPuk1SQqnJPcM65yCorwZnZXGDTYcVDgInh64nA0KTyKWa218xWACVAn1T1e4JzzkVyqBc1g2NRW5rZGgimXQNahOWtgVVJ+5VSwRRs3sngnIssQidDM0nzk95PMLMJx3zaI6Uca+oJzjkXWYS22YZjGIu6VlKRma2RVASsC8tLgeQ51doAq1NV5JeozrnopPS2YzMNGBm+Hgk8nVQ+XFItSR2AYmBeqoq8Beeci6yynoOTNBk4l+BSthS4FRgPTJU0ClgJDAMws8WSpgJLCNZ8GW1mB1LV7wnOOReJgEQlJTgzG1HORwPK2X8cMC7d+j3BOeeiyY71ZNLiCc45F5Ev/Oycy2O5kuC8F9U5l7e8Beeci8zng3PO5aXK7EXNNE9wzrnovAXnnMtP3ovqnMtjuZHePME5546Bt+Ccc/nL78E55/KR96I65/Kbt+Ccc/kqN9KbJzjnXGT+mIhzLo95gnPO5aVgNvLcSHAyS7koTZWStB74IO44kjQDNsQdRBbz76di2fYdnWRmzY+nAkkzCH6vdGwws8MXdq4yWZXgso2k+cewIlC14d9Pxfw7ipfPB+ecy1ue4JxzecsTXGrHugJ3deHfT8X8O4qR34NzzuUtb8E55/KWJ7ijkDRI0jJJJZLGxB1PtpH0kKR1kt6OO5ZsJKmtpBckLZW0WNLNccdUXfkl6mEkFQD/Bi4ASoHXgBFmtiTWwLKIpP7ADuBPZtY97niyjaQioMjMFkqqDywAhvq/oarnLbgj9QFKzGy5me0DpgBDYo4pq5jZXGBT3HFkKzNbY2YLw9fbgaVA63ijqp48wR2pNbAq6X0p/o/THSNJ7YHTgFdjDqVa8gR3pKMNsvPreBeZpHrAn4HvmNm2uOOpjjzBHakUaJv0vg2wOqZYXI6SdAJBcnvUzJ6MO57qyhPckV4DiiV1kFQTGA5Mizkml0MUTLXxB2Cpmd0ddzzVmSe4w5hZGXADMJPg5vBUM1scb1TZRdJk4GWgs6RSSaPijinLnAVcAZwvaVG4DY47qOrIHxNxzuUtb8E55/KWJzjnXN7yBOecy1ue4JxzecsTnHMub3mCyyGSDoSPHLwt6XFJdY+jroclXRq+flBS1xT7niup3zGc431JRyxOUl75YfvsiHiun0r6QdQYXX7zBJdbdptZz3AGj33AdckfhjOhRGZm36pgpotzgcgJzrm4eYLLXf8ATglbVy9ImgS8JalA0i8kvSbpTUnXQvB0vaTfSFoi6VmgxaGKJL0oqXf4epCkhZLekDQnHCx+HfDdsPV4tqTmkv4cnuM1SWeFxzaVNEvS65J+x9HH9X6KpL9IWhDOm3bNYZ/dFcYyR1LzsKyjpBnhMf+Q1KVSvk2Xl3zh5xwkqQZwETAjLOoDdDezFWGS2GpmZ0iqBfxT0iyCGS06A6cCLYElwEOH1dsc+D3QP6yriZltkvQAsMPM/jfcbxLwSzN7SVI7glEfnwFuBV4ys9slXQx8KmGV46rwHHWA1yT92cw2AoXAQjP7vqSfhHXfQLDGwXVm9q6kzwH3Aecfw9foqgFPcLmljqRF4et/EIx37AfMM7MVYfmFwGcP3V8DGgLFQH9gspkdAFZLev4o9Z8JzD1Ul5mVN+fbF4CuSaubNwgnduwPfCU89llJm9P4nW6S9OXwddsw1o3AQeCxsPwR4Mlwdo5+wONJ566VxjlcNeUJLrfsNrOeyQXh/+g7k4uAG81s5mH7DabiaZ+Uxj4Q3Nroa2a7jxJL2mP/JJ1LkCz7mtkuSS8CtcvZ3cLzbjn8O3CuPH4PLv/MBK4Pp+tBUidJhcBcYHh4j64IOO8ox74MnCOpQ3hsk7B8O1A/ab9ZBJeLhPv1DF/OBS4Lyy4CGlcQa0Ngc5jcuhC0IA9JAIdaof9BcOm7DVghaVh4DknqUcE5XDXmCS7/PEhwf22hgkVhfkfQUn8KeBd4C7gf+PvhB5rZeoL7Zk9KeoNPLhGfAb58qJMBuAnoHXZiLOGT3tzbgP6SFhJcKq+sINYZQA1JbwJ3AK8kfbYT6CZpAcE9ttvD8suAUWF8i/Hp5F0KPpuIcy5veQvOOZe3PME55/KWJzjnXN7yBOecy1ue4JxzecsTnHMub3mCc87lLU9wzrm89f8B5qA0M3TmlwYAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 提高准确率的方法有多种，最直接的方式就是加大数据量。刚刚只用了部分数据进行训练，现在使用全部数据构建模型\n",
    "\n",
    "# In[13]:\n",
    "\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0)\n",
    "clf=tree.DecisionTreeClassifier(random_state=2021)\n",
    "clf.fit(X_train,y_train)\n",
    "eval(clf,X_train, X_test, y_train, y_test)\n",
    "\n",
    "\n",
    "# 决策树可视化，在本地生成pdf文件（需要安装graphviz）\n",
    "\n",
    "# In[14]:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.tree import DecisionTreeClassifier, export_graphviz\n",
    "import graphviz\n",
    "dot_data = tree.export_graphviz(clf, out_file=None)\n",
    "graph = graphviz.Source(dot_data, format='svg')\n",
    "graph.render(\"demo\")\n",
    "graph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best set score:0.79\n",
      "Best parameters:{'class_weight': 'balanced', 'criterion': 'gini', 'max_depth': 15, 'random_state': 2021}\n",
      "Test set score:0.83\n",
      "训练集准确率 0.9635714285714285\n",
      "训练集f1_score 0.9590186045562786\n",
      "测试集准确率 0.865\n",
      "测试集f1_score 0.8290673697280343\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAATgAAAEGCAYAAADxD4m3AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAjkklEQVR4nO3deZhU1ZnH8e+vu6HZd0FkEcRGBTcEETfWGHBJ0Ew0GE1IhgQdTdSMTgIxGRNnmDhxdxKiRo0kEQhRUWOMgriiEQRE2UJoRaEFWUXZobvf+aNuY4Hd1XWhqm/V7ffjc5+qOnXvuS/14Mu599xzjswM55yLo4KoA3DOuWzxBOeciy1PcM652PIE55yLLU9wzrnYKoo6gGRq0NTUqFXUYeSsE0vaRR1CziuQog4hp33wwWo2bdx0SD9SYZsSs7070trXtq15zsxGHMr5DkVuJbhGrSjud0XUYeSsmc+MiTqEnNeoqFHUIeS0QQOGHXIdtndH2v+f7nrppkj/Vc6pBOecywMC8qSl7AnOORdeYWHUEaTFE5xzLiR5C845F1MClB8PYHiCc86FV+AtOOdcXPklqnMunuSXqM65mBJQ6AnOORdL3oJzzsWZ34NzzsWSPybinIs1f0zEORdLEhT4UC3nXFz5PTjnXGz5JapzLp78MRHnXFz5fHDOuVjzBOeciyf5hJfOuZjyS1TnXHx5J4NzLs7y5DGR/EjDzrncIqW3paxCx0hamLR9Kuk6SW0kzZS0InhtnXTMeEmlkpZLGl5bmJ7gnHPhVA3VSmdLwcyWm9nJZnYy0BfYAUwHxgGzzKwEmBV8RlIvYBTQGxgBTJSU8iSe4Jxz4RUovS19w4B3zewDYCQwKSifBFwYvB8JTDWz3Wa2EigF+qeq1O/BOefCS7+ToZ2keUmf7zez+6vZbxQwJXjfwczWApjZWkntg/JOwBtJx5QFZTXyBOecCyfcYyIbzaxfyuqkhsCXgfFpnPlAluoAv0R1zoUkpPS2NJ0LLDCzdcHndZI6AgSv64PyMqBL0nGdgTWpKvYE55wLpaoBd4idqMku5bPLU4CngNHB+9HAk0nloyQVS+oOlABzU1Xsl6jOuXAEhYWZeQ5OUhPgHOCKpOJbgGmSxgCrgIsBzGyJpGnAUqAcuNrMKlLVX28T3NGdW/HQT87b9/nIji34xaQ3mL2wjNuvG0qzxg1Y9dGnjP3Fc2zdsQeA3t3bcccPhtK8SUPMjKFXTWX33pS/b6x8sm03P7x7Nv/84GMkuPW6s2lcXMSPf/Ua23eW07lDM+754WCaN2kYdah1bteeckZeP53deyuoqKjkgrN78KNvnsbP7n+NGW+8T4MGhXTr2IJ7bhhGy2bFUYd7yEJcfqZkZjuAtgeUbSLRq1rd/hOACenWn9UEJ2kEcDdQCDxgZrdk83xhlJZtYeCVkwEoKBBLp47hr7Pf5eGbzuen973K6+98yGUjevH9S07hfx5+g8ICcd/44Vx5y3Msfm8jrVs0Ym9FZcR/irr1s/veYHDfztx34zD27K1g5+5yLrvxWX7ynf4MOKEjf5rxT+57dBE3fLNv1KHWueIGhTz2y5E0a9yQveUVfOkHjzPs1CMZdEoXfjLmdIoKC7j5gde5e+p8/vM7Z0Qd7iHJo6Go2bsHFzyA92sSNxB7AZcGD+rlnEF9uvD+mk9YvX4rR3duxevvfAjAS/NX8aWzjwZgaL8jWfLeRha/txGAjz/dRWVlyg6cWNm6Yw9zF3/EqOE9AWjYoJCWzYp5r+wTTjv+cADO7nMEz7z2foRRRkcSzRonWq57yyvZW1GJgCH9ulIULJLc99jDWbNhW4RRZk6GOxmyJpudDP2BUjN7z8z2AFNJPKiXc74ypCePvbgcgH+8v4lzzzgKgJEDS+h0WHMAenRuhZnx6C0X8tJvLuWaS+pXK2XV2q20admI6+98lXO/N50f3vUqO3bt5ZhurZn5xioA/vrqStZu3B5xpNGpqKhkyJVT6XXJQww6pQt9jzt8v++nPLeMYaceGVF0meUJLvEA3uqkz7U+lBeFBkUFnHv6UTzxcikA37vteb7z5RN5ceIomjVJXG4AFBUWMOD4Ixj7P89y7nV/5vyzejCwT5dUVcdKeUUli0s38Y3zjuVvv7qIxo2KmDjtHW697mwmPb2U8655gm0799KgqP52zBcWFvDivaN4e/K3eGv5epat3LTvuzsnz6OwUHx1WM8II8yQNHtQcyC/ZfUeXFoP5UkaC4wFoLhlFsOp3hf6d+PtFevZsGUHACtWf8y/jHsCgB6dWvHF07oBsGbDNl5750M2f7oLgJlz3uekksN45a3V1VUbOx3bNaVju6b0OTbxUPl5Z3XnN39+mxu+2ZdHJpwLwHtln/DCm/Xj90ilZbNizjjxCF6Yt4rjurdl6ox/MGPO+zz2vyNzolVzqIQoyFAvarZl85/btB7KM7P7zayfmfVTg6ZZDKd6Xx3Sk8de/Oe+z+1aNQYS//rccHl/fvf0IgBmzfuA3ke1o3FxEYUF4syTOrH8g811Hm9U2rdpQsfDmvJu2RYAXlu4hpKurdm4ZScAlZXGPVMXcvl5x0UYZXQ2btnJJ9t2A7BzdzmvvFVGSZfWvPDmB/xq2gL+8PPzadKoQcRRZk6+XKJmswX3JlASPJD3IYmxZl/P4vlCa1xcxOC+XfnBXS/sK/uXIcfwnZEnAvD07Hd55NmlQOIRiYmPLmDWr0eBGTPnvs+MOe9HEXZkbr7ydK755cvsLa+g6+HNue0HA3ls1gp+//QyAEac2Y1LzimJOMporNu8ne/fOouKSsMqjS8POpovDuhG/2/9gT17Krl4XOJZ1b7HHc5t1w6ONthDlSOXn+mQWfZ6AiWdB9xF4jGRh4JnWGpU0LyTFfe7ItUu9dqqZ8ZEHULOa1TUKOoQctqgAcNYMH/hIaWnBh1LrN237kpr349uuWB+bWNRsymrz8GZ2TPAM9k8h3OubiWeg8uPJly9HcngnDt4BXkyZbknOOdcOHl0D84TnHMuJCFvwTnn4iifxqJ6gnPOheadDM65ePJ7cM65OCsoyI8xx57gnHOhiLxZ2N4TnHMuJOG9qM65+MqXe3D5cSHtnMshmVs2UFIrSY9K+oekZZJOl9RG0kxJK4LX1kn7j5dUKmm5pOG11e8JzjkXSoaXDbwbeNbMjgVOApYB44BZZlYCzAo+Eyx5MAroDYwAJgZLI9TIE5xzLhwlelHT2VJWI7UABgIPApjZHjPbQmJpg0nBbpOAC4P3I4GpZrbbzFYCpSSWRqiRJzjnXGghWnDtJM1L2sYmVXMUsAH4naS3JD0gqSnQwczWAgSv7YP9Qy+D4J0MzrnQQvSibkwxH1wRcArwfTObI+lugsvRmk5bTVnKCS29BeecCyWD9+DKgDIzmxN8fpREwlsnqSNA8Lo+af9al0FI5gnOOReOoEBKa0vFzD4CVks6JigaBiwFngJGB2WjgSeD908BoyQVB0shlABzU53DL1GdcyEpkxNefh94RFJD4D3g2yQaXtMkjQFWARcDmNkSSdNIJMFy4Gozq0hVuSc451woInMjGcxsIVDdPbphNew/AUi5tksyT3DOuXDk0yU552IsT/KbJzjnXHjegnPOxZbPJuKciyUpo72oWeUJzjkXWm3PuOUKT3DOudDyJL95gnPOhSOf0dc5F2fei+qci608yW+e4JxzIUkUFObHPB2e4JxzoVRNl5QPcirBndTzMF6ecVXUYeSsBRsXRx1Czju13YlRh1Av+D0451xseYJzzsWTfGV751xMCbyTwTkXX3lyheoJzjkXUpqr1ueC/GhnOudyigqU1lZrPdL7khZJWihpXlDWRtJMSSuC19ZJ+4+XVCppuaThtdXvCc45F0oGlw2sMsTMTk5aP3UcMMvMSoBZwWck9QJGAb2BEcBESYWpKvYE55wLTcFlam3bQRoJTAreTwIuTCqfama7zWwlUAr0T1WRJzjnXDiCwgKltQHtJM1L2sYeUJsBMyTNT/qug5mtBQhe2wflnYDVSceWBWU18k4G51woiUtUS3f3jUmXntU508zWSGoPzJT0j1pOfaCUgXgLzjkXWqbuwZnZmuB1PTCdxCXnOkkdE+dRR2B9sHsZ0CXp8M7AmlT1e4JzzoVWIEtrS0VSU0nNq94DXwQWA08Bo4PdRgNPBu+fAkZJKpbUHSgB5qY6h1+iOudCEdVfKx6EDsD0oDOiCJhsZs9KehOYJmkMsAq4GMDMlkiaBiwFyoGrzawi1QlqTHCS/o8U17dmdk3IP4xzLg4EhQVp34OrkZm9B5xUTfkmYFgNx0wAJqR7jlQtuHnpVuKcq1/yZCBDzQnOzCYlf5bU1My2Zz8k51wuE7XfX8sVtXYySDpd0lJgWfD5JEkTsx6Zcy5nKc0taun0ot4FDAc2AZjZ28DALMbknMtxmehFrQtp9aKa2eoDhl2k7LlwzsVXyHGmkUonwa2WdAZgkhoC1xBcrjrn6qfCHGidpSOdS9QrgatJjPn6EDg5+Oycq6ckS2uLWq0tODPbCFxWB7E45/KAyJ81GdLpRT1K0l8kbZC0XtKTko6qi+Ccc7kpX1pw6VyiTgamAR2BI4A/A1OyGZRzLocFq2qls0UtnQQnM/uDmZUH2x+pZYoS51x8CUt7i1qqsahtgrcvShoHTCWR2L4G/LUOYnPO5ahMjEWtC6k6GeaTSGhVDc0rkr4z4L+yFZRzLrfl/XNwZta9LgNxzuWHRC9q/rfg9pF0PNALaFRVZma/z1ZQzrnclicNuNoTnKSbgMEkEtwzwLnAbMATnHP1UR4N1UqnF/WrJCaf+8jMvk1igrrirEblnMtZItHJkM4WtXQuUXeaWaWkckktSCwAEasHfXftKWfk9U+wZ28FFRWVXHB2D374zf7c+vu5/PFvy2jbMnFl/uN/HcAX+h8ZcbR1Y8OmXdz2m+V8vGUPkjh3aEcuPPezFdoefXo1D05eydR7T6dliwYsWPQxv5uykvKKSooKCxhzWXdO7t06xRni5cP1W7nq1udZv3kHBQXim+f15oqLTmLMhGd5d/UWAD7ZvpuWTYt56d5R0QabAQU58AhIOtJJcPMktQJ+S6JndRu1LPQAIOkh4AJgvZkdfyhBZltxg0Ie/+VImjZuwN7yCr70g+kMPbUrAFd85USuurhPxBHWvcIC8d3LjuLo7s3ZsbOca258iz4ntOLIzk3ZsGkXby3aQvt2nzXkWzRvwM/+ozdtWxfz/urt/OSWRfzx1wMi/BPUrcLCAm4eeyYnlbRn6449DLv6Tww+pQsP3jhi3z4/vW82LZo2jDDKzInNJaqZXWVmW8zsXuAcYHRwqVqbh4ERte2UCyTRtHEDAPaWV1JeUYny5jZqdrRpXczR3ZsD0KRxEV06NWHTx3sAuO8P7zHm6/t3sh/drRltWycS3pGdm7BnbyV79lbWbdAROrxtU04qSaxP3LxJQ3p2bcPajdv2fW9mPPlyKV8Z0jOqEDNGpDdMK92hWpIKJb0l6engcxtJMyWtCF5bJ+07XlKppOWShtdWd40JTtIpB25AG6AoeJ+Smb0CbE7rT5gDKioqGXrln+h9ye8YdEoX+h7XAYCHnlrM4Cumcu3tL7Bl666Io4zGug27ePf9bRzTozlvzN9Eu9YNOerIZjXuP3vuRnoc2YyGDernqpSrPvqURaUb6Hvs4fvK/r5oDYe1bkyPTq2iCyxTMj9U61r2n4JtHDDLzEqAWcFnJPUCRgG9STSeJkoqTFVxqkvU21N8Z8DQ2uOunaSxwFiALl07Z6LKg1JYWMAL936NT7bt5ls//xvLVm5i9JeO598v64ckbpk0h5vuf527r8/IHztv7NxVwX/fuZQrvtGDwkIx9YlVTBh/Qo37f1C2nYemrEy5T5xt27mHb938Nyb829k0T7ocffylFbFovVXJ1EB6SZ2B80mslPXvQfFIEk9uAEwCXgJ+FJRPNbPdwEpJpSQWiv57TfXX+E+smQ1JsWXs/3Izu9/M+plZv7bt2maq2oPWslkxZ57YiRfnraJ96yYUFhZQUCAuP7cXb/1jfe0VxEh5eSX/fedShpzZnjP7t2Ptul18tGEXV42bz+hr5rBx826+f+MCNm9JXLpu2LSb/7pjKTf82zEc0aFxxNHXvb3lFXz75r/x1aE9ueCsHvvKyysq+evsd7loUEmE0WWOSEx4mc4GtJM0L2kbe0B1dwE/BJLvZ3Qws7UAwWv7oLwTsDppv7KgrEa+8DOwcctOGhQV0LJZMTt3l/PKW2V875I+rNu0nQ5tmwLwzGsrObZbm1pqig8z4677/0mXTk34yvmJlnX3rk2Zeu/p+/YZfc0c7vnvU2jZogHbtpdz062L+daobvQ+pmVUYUfGzLj2jhfo2bUNV311/06plxes5ugurTnisJov6/NNiJsPG82sX3VfSKrqhJwvaXAadVV30ZuyKekJDli3eTvX3PoCFZWVVFbCyEE9+OKAblz9v8+z+N2NSNClQwtuu3ZQ1KHWmSXLP2XW7PV069KUq8fPB2D0Jd3p36f6JP+XGR+yZt1OpkxfxZTpqwCYMO4EWrWMR69hbeYsWcu055fTq3tbBl85FYAb/3UA5/TvxvSYXZ5Cxi5RzwS+LOk8EqOkWkj6I7BOUkczWyupI4lH0yDRYuuSdHxnYE3KOM2y8zyLpCkkrqPbAeuAm8zswVTH9Ol7sr38xvNZiScOFmxcHHUIOe/UdidGHUJOO3vAUBbMX3hIjwh06nWsXfHIb9Pa96ZTBs6vqQWXLGjB3WBmF0i6FdhkZrcEMxm1MbMfSupNYn7K/iTmppwFlJhZjYtgpTNUSySmLD/KzG6W1BU43MxSPgtnZpfWVrdzLj9lebD9LcA0SWOAVcDFAGa2RNI0YClQDlydKrlBepeoE0ncABwK3AxsBR4DTj3o8J1zeS3TT4ma2Uskeksxs00khodWt98EEj2uaUknwZ1mZqdIeis4wcfB8oHOuXpIyo1xpulIJ8HtDR6mMwBJh7F/l65zrp7Jl3E+6SS4e4DpQHtJE0jMLvKTrEblnMtpsZnw0swekTSfxDWxgAvNzFe2d66eEjFqwQW9pjuAvySXmdmqbAbmnMtdsWnBkVhBq2rxmUZAd2A5iQGvzrl6KDYJzsz2GzUdzCRyRQ27O+diToQaqhWp0EO1zGyBJH8Gzrn6SpmbTSTb0rkH9+9JHwuAU4ANWYvIOZfz4tSCa570vpzEPbnHshOOcy7XVc3omw9SJrjgAd9mZvYfdRSPcy4P5H0LTlKRmZWnMz25c65+iUMv6lwS99sWSnoK+DOwvepLM3s8y7E553KQiEeCq9IG2ERiNpGq5+EM8ATnXD2VL8sGpkpw7YMe1MV8ltiq5Ef6ds5lRRwWfi4EmnEQ86A75+JLikcLbq2Z3VxnkTjn8kae5LeUCS5f/gzOuTpUtWxgPkiV4KqdMtg5V99Z3vSiplr4eXNdBuKcyx9Kc0tZh9RI0lxJb0taIunnQXkbSTMlrQheWycdM15SqaTlkobXFme+PJDsnMshkqW11WI3MNTMTgJOBkZIGgCMA2aZWQmJpQHHJc6pXsAoElO1jQAmBqOtauQJzjkXStV0SelsqVjCtuBjg2AzYCQwKSifBFwYvB8JTDWz3Wa2EiglsUZqjTzBOedCK5DS2oB2kuYlbWOT65FUKGkhidXrZ5rZHKCDma0FCF7bB7t3AlYnHV4WlNUo9Hxwzrl6TkLpPwi3MdXK9sHCzSdLagVMl3R8qjNXV0Wqk3sLzjkXSrodDGGeMzOzLSQWfh4BrJPUESB4XR/sVgZ0STqsM7AmVb2e4JxzoSnN/1LWIR0WtNyQ1Bj4AvAP4ClgdLDbaODJ4P1TwChJxZK6AyUkJgWpkV+iOudCy9BQrY7ApKAntACYZmZPS/o7ME3SGGAVcDGAmS2RNA1YSmLy3auDS9waeYJzzoVWkIGBTmb2DtCnmvJN1DDQwMwmABPSPYcnOOdcKIn54PJjJGdOJbgCiYYFDaIOI2cNaP+5f+zcAUo/eTfqEHLa7oo9GaknT/JbbiU451x+qK0DIVd4gnPOheYtOOdcLKXzCEiu8ATnnAtHUJgnTThPcM650PIjvXmCc86FJAgzFjVSnuCcc6HlR3rzBOecOwjegnPOxVZ+pDdPcM65kBKrauVHivME55wLyZ+Dc87FWJ404DzBOefC8xaccy6WJG/BOedizFtwzrnYypcJL33RGedcKJla+FlSF0kvSlomaYmka4PyNpJmSloRvLZOOma8pFJJyyUNry1WT3DOudAUrI1a21aLcuB6MzsOGABcLakXMA6YZWYlwKzgM8F3o4DeJJYXnBgsWFMjT3DOuZAyszKqma01swXB+63AMhIr1Y8EJgW7TQIuDN6PBKaa2W4zWwmUAv1TncMTnHMutEwv/CypG4kVtuYAHcxsLSSSINA+2K0TsDrpsLKgrEbeyeCcC01Ku23UTtK8pM/3m9n9+9elZsBjwHVm9mmKS9vqvrBUJ/cE55wLLUTrbKOZ9auxHqkBieT2iJk9HhSvk9TRzNZK6gisD8rLgC5Jh3cG1qQ6uV+iOudCSVx+pvdfynoSTbUHgWVmdkfSV08Bo4P3o4Enk8pHSSqW1B0oAeamOoe34Jxz4WXmObgzgW8AiyQtDMp+DNwCTJM0BlgFXAxgZkskTQOWkuiBvdrMKlKdwBOccy60TKQ3M5udoqphNRwzAZiQ7jk8wTnnQgrbRxodT3DOuXCUP0O1PME55w6CJzjnXEz5bCLOuVjKnztwnuCccwfD78Hll+/dOpPn5qykXasm/P2BywH46X2v8twbK2lQVED3I1rx6/84h5bNiiOONBrV/T5V/m/afP7z/tmUPjaWti0bRxRh3bv5128ze946Wrcs5k93DQJg/O3z+WDNdgC2bd9Ls6YNmHz7QJas+JgJ9y5KHGjGd7/WkyGndYwq9EOUP4vOZG0kQ01zPeWqS4f34tFfXLhf2ZC+XXn9gct57beX06NzK+6Y8mY0weWA6n4fgLL1W3lp/io6t29e90FF7ILBnbnnp6ftV/aL6/sy+faBTL59IEMGdGTIaYcD0KNrC37/y7OYfPtA7vnpafzi3kWUV1RGEXZGZGIkQ13I5lCtmuZ6yklnntiJ1s0b7Vc2tN+RFBUmfqJTjzucNRu2RRFaTqju9wG48Tev8LOxZ+XLFUtGndK7LS2aNaj2OzPj+dfXMPysIwBoVFy47+/S7j2Vef97ZWg+uKzL2iVqMM1J1ZQnWyVVzfW0NFvnzKY/PruUiwb3jDqMnPLM6+/RsV0zTuhxWNSh5Jy3lm6mbatiuh7RbF/Z4n9+zM2/fpuPNu7k59ecvC/h5afok1c66uQXPmCup7xz2yNzKSos4JJhx0QdSs7YsWsvd0yey/jRA6IOJSfNmL2GLwattyrH92zNtLsHM+l/z+Lhx0vZvSflMMqclun54LIl6wnuwLmeqvl+rKR5kuZt2LAp2+GENmXGUma8sZL7xw/PiSZ3rli55hM++OhTzr7iEU687CHWbNjGoCsns27z9qhDi1x5RSUvzlnLOWceUe333Ts3p3FxEe+u2lrHkWVGpmYTqQtZ7UWtYa6n/QST390P0Ldfn5ST19W15+e+z91T5/P0Hf9Ck0bV32upr3of1Y4Vj47d9/nEyx7ixYmX1qte1JrMfWcjR3ZqRoe2n/0WH67bQYd2jSgqLGDt+h18sGYbR7RvEmGUhyI37q+lI2sJLsVcTzlpzIS/8drbZWz6ZBe9Rz3IuNGnceeUeezeW8FFP5oOQL/jDufO66qd5CD2qvt9vnHu8VGHFakb71jA/CWb2LJ1D+d/93nGfq0nI7/QlRmz1zD8rP1n0n572WYenl5KUVEBBYIfffcEWrVoGFHkhy4XWmfpkFl2Gk2SzgJeBRYBVf3hPzazZ2o6pm+/PvbanJeyEo+rH0o/eTfqEHLa14ZezpKFSw8pO53Q53h74uVH09r36JbHzU81o2+2ZbMXNdVcT865fKW8GcjgIxmccwcjPzKcJzjnXGj5cg/OE5xzLhTlUS9qPj9K7ZyLSKaeg5P0kKT1khYnlbWRNFPSiuC1ddJ34yWVSlouaXht9XuCc86FlsEHfR8GRhxQNg6YZWYlwKzgM8FY9lFA7+CYiZIKU1XuCc45F16GxmqZ2SvA5gOKRwKTgveTgAuTyqea2W4zWwmUAv1T1e8JzjkXWogWXLuqoZjBNra2uoEOwWQdVZN2tA/KOwGrk/YrC8pq5J0MzrnQQvSibszgg77VnTTlSAVvwTnnQqnqRc3ifHDrJHUECF7XB+VlQJek/ToDa1JV5AnOORdalmcTeQoYHbwfDTyZVD5KUrGk7kAJMDdVRX6J6pwLLVNPwUmaAgwmca+uDLgJuAWYJmkMsAq4GMDMlkiaRmLS3HLgajNLOameJzjnXHgZetDXzC6t4atqp+0xswnAhHTr9wTnnAvNh2o552JJQIEnOOdcLOXKggtp8ATnnAspN9ZbSIcnOOdcaPmS4Pw5OOdcbHkLzjkXWr7MB+cJzjkXiveiOufizVtwzrl48l5U51yM5Ud68wTnnDsI3oJzzsWX34NzzsWR96I65+LNW3DOubjKj/TmCc45F5o/JuKcizFPcM65WJLyZyyqzFIuK1inJG0APog6jiTtgI1RB5HD/PepXa79Rkea2WGHUoGkZ0n8udKx0cxGHMr5DkVOJbhcI2leBhetjR3/fWrnv1G0fD4451xseYJzzsWWJ7jU7o86gBznv0/t/DeKkN+Dc87FlrfgnHOx5QnOORdbnuCqIWmEpOWSSiWNizqeXCPpIUnrJS2OOpZcJKmLpBclLZO0RNK1UcdUX/k9uANIKgT+CZwDlAFvApea2dJIA8shkgYC24Dfm9nxUceTayR1BDqa2QJJzYH5wIX+d6jueQvu8/oDpWb2npntAaYCIyOOKaeY2SvA5qjjyFVmttbMFgTvtwLLgE7RRlU/eYL7vE7A6qTPZfhfTneQJHUD+gBzIg6lXvIE93nVjSL263gXmqRmwGPAdWb2adTx1Eee4D6vDOiS9LkzsCaiWFyektSARHJ7xMwejzqe+soT3Oe9CZRI6i6pITAKeCrimFweUWIuoQeBZWZ2R9Tx1Gee4A5gZuXA94DnSNwcnmZmS6KNKrdImgL8HThGUpmkMVHHlGPOBL4BDJW0MNjOizqo+sgfE3HOxZa34JxzseUJzjkXW57gnHOx5QnOORdbnuCcc7HlCS6PSKoIHjlYLOnPkpocQl0PS/pq8P4BSb1S7DtY0hkHcY73JX1u9aWayg/YZ1vIc/1M0g1hY3Tx5gkuv+w0s5ODGTz2AFcmfxnMhBKamX2nlpkuBgOhE5xzUfMEl79eBY4OWlcvSpoMLJJUKOlWSW9KekfSFZB4ul7SryQtlfRXoH1VRZJektQveD9C0gJJb0uaFQwWvxL4QdB6PFvSYZIeC87xpqQzg2PbSpoh6S1J91H9uN79SHpC0vxg3rSxB3x3exDLLEmHBWU9JD0bHPOqpGMz8mu6WPKV7fOQpCLgXODZoKg/cLyZrQySxCdmdqqkYuA1STNIzGhxDHAC0AFYCjx0QL2HAb8FBgZ1tTGzzZLuBbaZ2W3BfpOBO81stqSuJEZ9HAfcBMw2s5slnQ/sl7Bq8K/BORoDb0p6zMw2AU2BBWZ2vaT/DOr+HolFXK40sxWSTgMmAkMP4md09YAnuPzSWNLC4P2rJMY7ngHMNbOVQfkXgROr7q8BLYESYCAwxcwqgDWSXqim/gHAK1V1mVlNc759AeiVGHIJQItgYseBwFeCY/8q6eM0/kzXSLooeN8liHUTUAn8KSj/I/B4MDvHGcCfk85dnMY5XD3lCS6/7DSzk5MLgv/RtycXAd83s+cO2O88ap/2SWnsA4lbG6eb2c5qYkl77J+kwSSS5elmtkPSS0CjGna34LxbDvwNnKuJ34OLn+eAfwum60FST0lNgVeAUcE9uo7AkGqO/TswSFL34Ng2QflWoHnSfjNIXC4S7Hdy8PYV4LKg7FygdS2xtgQ+DpLbsSRakFUKgKpW6NdJXPp+CqyUdHFwDkk6qZZzuHrME1z8PEDi/toCJRaFuY9ES306sAJYBPwGePnAA81sA4n7Zo9LepvPLhH/AlxU1ckAXAP0CzoxlvJZb+7PgYGSFpC4VF5VS6zPAkWS3gH+C3gj6bvtQG9J80ncY7s5KL8MGBPEtwSfTt6l4LOJOOdiy1twzrnY8gTnnIstT3DOudjyBOeciy1PcM652PIE55yLLU9wzrnY+n/MommmLyZfLAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 参数调优\n",
    "#\n",
    "# DecisionTreeClassifier 参数如下\n",
    "#\n",
    "# DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=None,\n",
    "#             max_features=None, max_leaf_nodes=None,\n",
    "#             min_impurity_decrease=0.0, min_impurity_split=None,\n",
    "#             min_samples_leaf=1, min_samples_split=2,\n",
    "#             min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
    "#             splitter='best')\n",
    "#\n",
    "# - class_weight：类别权重，默认为None。可选值还有：dict,balanced。dict：指定样本各类别的权重，权重大的类别在决策树构造的时候会进行偏倚；balanced：算法自己计算权重，样本量少的类别所对应的样本权重会更高。\n",
    "# - criterion：在基于特征划分数据集合时，选择特征的标准。默认是gini,也可以是entropy。\n",
    "# - max_depth：决策树的最大深度，我们可以控制决策树的深度来防止决策树过拟合\n",
    "# - max_features：在划分数据集时考虑的最多的特征值数量。为int或float类型。其中int值是每次split时最大特征数；float值是百分数，即特征数=max_features*n_features\n",
    "# - max_leaf_nodes：最大叶子节点数。int类型，默认为None。默认情况下是不设置最大叶子节点数，特征不多时，不用设置。特征多时，可以通过设置最大叶子节点数，防止过拟合。\n",
    "# - min_impurity_decrease：节点划分最小不纯度。float类型，默认值为0。节点的不纯度必须大于这个阈值，否则该节点不再生成子节点。通过设置，可以限制决策树的增长。\n",
    "# - min_impurity_split：信息增益的阀值。信息增益必须大于这个阀值，否则不分裂\n",
    "# - min_samples_leaf：叶子结点需要的最小样本数。如果其叶子结点数小于这个阈值，则会和兄弟节点一起被剪枝。min_samples_leaf的取值可以是int或float类型\n",
    "# - int类型：最小样本数量\n",
    "# - float类型：表示一个百分比。即：最小样本数=min_samples_leaf * 样本数量n，并向上取整。\n",
    "# - min_samples_split：当节点的样本数少于min_samples_split时，不再继续分裂。默认值为2.\n",
    "# - min_weight_fraction_leaf：默认为0.0\n",
    "# - presort：bool类型，默认为False。表示在拟合前，是否对数据进行排序来加快树的构建。当数据集较小时，使用presort=true会加快分类器构建速度。当数据集较为庞大时，presort=true又会使得树的构建十分缓慢\n",
    "# - random_state\n",
    "# - splitter：在构造树时，选择属性特征的原则，可以是best或random。默认是best,best代表在所有的特征中选择最好的，random代表在部分特征中选择最好的。\n",
    "#\n",
    "# 版权声明：本文为CSDN博主「Andrewings」的原创文章，遵循CC 4.0 BY-SA版权协议，转载请附上原文出处链接及本声明。\n",
    "# 原文链接：https://blog.csdn.net/shichensuyu/article/details/96478729\n",
    "\n",
    "# In[15]:\n",
    "\n",
    "\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "params = {'max_depth':range(5,20),'criterion':np.array(['entropy','gini']),\"class_weight\":['balanced'],\"random_state\":[2021]}\n",
    "clf = GridSearchCV(estimator=tree.DecisionTreeClassifier(), param_grid=params, cv=8, n_jobs=5, scoring=\"f1_macro\")\n",
    "clf.fit(X_train,y_train)\n",
    "print(\"Best set score:{:.2f}\".format(clf.best_score_))\n",
    "print(\"Best parameters:{}\".format(clf.best_params_))\n",
    "print(\"Test set score:{:.2f}\".format(clf.score(X_test, y_test)))\n",
    "eval(clf,X_train, X_test, y_train, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "pycharm": {
     "is_executing": true
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best set score:0.86\n",
      "Best parameters:{'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 19, 'n_estimators': 200, 'random_state': 2021}\n",
      "Test set score:0.88\n",
      "训练集准确率 1.0\n",
      "训练集f1_score 1.0\n",
      "测试集准确率 0.91\n",
      "测试集f1_score 0.883161758171133\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAATgAAAEGCAYAAADxD4m3AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAgfElEQVR4nO3dd5xU9dXH8c+ZZXfpHREFBKQpJBSxgRLQmCBR0URj11iCmBhrEjVPnhCNJiYao9FogmLUx4K9RQSJSlCDihSVIoqCSJHeQbad54+5i8PKzs7AzN6Zu983r/ti5s4tZ+e1HH73d+/vd8zdERGJoljYAYiIZIsSnIhElhKciESWEpyIRJYSnIhEVr2wA0hkhY3c6jcPO4yc1ad7m7BDyHmGhR1CTlv82WJWr16zR19SQctu7qVbU9rWNy+b6O7D9uR8eyK3Elz95hQPuCjsMHLW5IkXhx1CziuKFYYdQk4bdOjQPT6Gl25N+d/pl5NHt97jE+6BnEpwIpIHDLD8aCkrwYlI+goKwo4gJUpwIpImUwtORCLKAMuPBzCU4EQkfTG14EQkqnSJKiLRZLpEFZGIMqAgPxJcfkQpIjkkaMGlstR0JLMrzGyOmc02s0fNrL6ZtTSzSWb2cfB3i4TtrzWzBWY238y+W9PxleBEJH1mqS1JD2H7ApcCA9y9N1AAnAZcA7zi7t2AV4L3mNmBwee9gGHAXWaW9IE8JTgRSU/lYyIZaMER7yZrYGb1gIbAMmAE8EDw+QPAicHrEcA4d9/u7guBBcAhyQ6uBCci6YtZagu0NrN3E5aRlYdw96XALcBiYDmwwd1fBtq6+/Jgm+XAXsEu+wKfJ0SxJFhXLd1kEJH0mEEs5aFaq919wK4PYy2It8o6A+uBJ8zsrGRn3sW6pEVl1IITkfRloA8O+Daw0N1XuXsp8DQwEFhhZu3ip7F2wMpg+yVAh4T92xO/pK2WEpyIpC/1S9RkFgOHmVlDMzPgaGAe8DxwbrDNucBzwevngdPMrNjMOgPdgHeSnUCXqCKSpsw86Ovub5vZk8AMoAyYCYwBGgOPm9kFxJPgKcH2c8zscWBusP1P3b082TmU4EQkPRmcD87dRwOjq6zeTrw1t6vtbwRuTPX4SnAikj6NRRWRaDJNeCkiEaUpy0UkujSbiIhEmSa8FJHI0iWqiERSekO1QqUEJyLp0yWqiESWbjKISCTpMRERiS7DlOBEJIryqAGnBCciaTIoKMiPDFenE9zFP+jH2cf2AnfmLlzDT2+exOWnD+Cc4b1Zs34bAL+7779MemcRLZrW54HfDKdfj7Y8OnEev7xzcrjB17IvS8o48arnKCktp6y8guOO7MIvzzmEkTe+zCdL1gOwYUsJzRoV8crdPww32Bzw96dn8eBLs3GHc4b34uLv9ws7pIzSJSpgZsOA24lXy7nX3W/K5vnS0a5VIy46sQ+HXfB/fFlSzn3/eyzfH9odgLufmsmdT8zYafvtJWX8/v63OKBTKw7o1CqMkENVXFjAU386gUYNCiktK+eEK5/l6IM7MuZ/vrNjm9H/+C9NGxWFGGVumLtwDQ++NJt/33EqRYUFnHLtc3znkM7s37552KFlRD5dombtXm9QzutvwLHAgcDpQdmvnFGvIEb94noUxIyGxYV8sWZLtdtu/bKMt2Yv48uSslqMMHeYGY0aFAJQWlZBWXnFTv+LuzsvTFnASUO7hhVizvho8VoG9NybhvULqVcQY+A39+XFNz8JO6yMMrOUlrBl82GWQ4AF7v6pu5cA44gXmMgJy9ds4Y4nZvDBI+fz4eMXsnHLdl6bvhiAH4/owxtjzuSOn3+bZo2LQ440d5SXV3D0xY/T+9T7GdyvPf17tt3x2Vuzl9O6RUO67Ns8vABzxAGdWjH1g2Ws3biNrV+WMumdRSxdtSnssDJKCW43SnzVpmaNixk+sAt9z7qfA04dS8P6hfzw6B7c9/wH9Dvnfo686GFWrNnCDaOODDvUnFFQEOOVu3/IzIfPYeb8lcxbtGbHZ8+89jEnDVHrDaDHfi259NSD+P7Vz3LKr56jd5fWFBTkx4OxKUmx3kxN+c3MepjZrIRlo5ldni+V7VMq8WVmIytrJnpp9ZeImTakfwc++2IjazZso6y8ghfeWMAhvfZh1fqtVFQ47vDA+Nkc1KNtzQerY5o1LmZgn314bVr8/6+y8grGv7mQEd9Sgqt09rG9mHz36bx468m0aFKf/SPUsjWMWEFqSzLuPt/d+7p7X+AgYCvwDHlS2T6lEl/uPsbdB7j7ACtslMVwqgS3chMDDtibBsXx+yzf6teB+YvX0rZlwx3bHHdE151aKXXZ6vXb2LB5OwDbtpfx+owldO3QHIApwet92jQOMcLcsmrdViD+e/avNz/hB8ENrKjIwiXq0cAn7v4ZGaxsn827qNOAbkF5r6XEM+8ZWTxfWqZ/uILnpyxg8t2nU15ewfsLVvHAi7P565VH842ubXCHxV9s5IrbXtmxz3sPnUeThkUUFsYYPqgLP7j6WeYvXhviT1F7Vq7dyqW3vEp5RQUVFc4Jg7vyncM6AfDsfxZw0pBu4QaYY869fjxrN26jsF4Bf7pkCM2b1A87pMxJqeTpDq3N7N2E92PcfcwutjsNeDR4vVNlezNLrGz/VsI+NXZ7mXvSwtB7xMyGA7cRf0zkvqAiTrViTfb14gEXZS2efPfFxIvDDiHnFcUKww4hpw06dCgzps/co97/wnbdvPWPbktp2y9uOm56dZXtK5lZEfGru17uvsLM1rt784TP17l7CzP7GzDV3R8K1o8Fxrv7U9UdO6vPwbn7eGB8Ns8hIrUr/hxcRu+QHgvMcPcVwfsVZtYuaL2psr2I1K5YzFJaUnQ6X12egirbi0ho0uuDS34os4bAMUBi39RNqLK9iITDsAzN6OvuW4FWVdatQZXtRSQM+TQWVQlORNKWC8OwUqEEJyLpyWAfXLYpwYlI2mKx/HgAQwlORNJi5E3VQCU4EUmTkbG7qNmmBCciaVMfnIhEVG5MZpkKJTgRSYuegxOR6DLdRRWRCFMLTkQiS3dRRSSS1AcnItFlEMuTDKcEJyJpSmsyy1ApwYlIWgz1wYlIVFn+TJeUHw+ziEhOyURl+/hxrLmZPWlmH5rZPDM7PF8q24tIRGWw8PPtwAR37wn0AeaRJ5XtRSSiLGYpLUmPYdYUGAyMBXD3EndfTwYr2yvBiUhazFIrGRjcaW1tZu8mLCMTDtUFWAX808xmmtm9ZtaIKpXtgcTK9p8n7F9jZXvdZBCRtKXxHNzqJJXt6wH9gZ+5+9tmdjvB5Wg1dnVST3ZyteBEJG0ZusmwBFji7m8H758knvBWBBXtUWV7EalVZpnpg3P3L4DPzaxHsOpo4kWdVdleRMKTwefgfgY8bGZFwKfAecQbXqpsLyLhyFR+c/dZwK766FTZXkRCYEasID96t5TgRCQtmi5pN/Xp3obXJo4KO4ycNX317LBDyHkD2/YPO4SclqnElC9jUXMqwYlIflCCE5FoMlW2F5GIMtBNBhGJrjy5QlWCE5E0pT4VUuiU4EQkbZqyXEQiSc/BiUik6RJVRKLJoECXqCISRfFL1KTzTOYMJTgRSVueXKEqwYlI+mJqwYlIFBm7Lo6Qi6pNcGZ2B0kKOrj7pVmJSERym0FBLDMtODNbBGwCyoEydx9gZi2Bx4BOwCLgh+6+Ltj+WuCCYPtL3X1isuMna8G9u6fBi0g0ZbgPbqi7r054X1n4+SYzuyZ4f3WVws/7AP82s+7Jpi2vNsG5+wOJ782skbtv2ZOfQkTyn+HZ7oMbAQwJXj8ATAauJqHwM7DQzCoLP0+t7kA1TglgZoeb2VxgXvC+j5ndtSfRi0h+sxQXkhd+hng32MtmNj3hs1ot/Hwb8F3iJbtw9/fMbHAK+4lIRKXRgktW+BlgkLsvM7O9gElm9mGSbbNT+NndP6+yKmmpLhGJrlSLPqfST+fuy4K/VwLPEL/krNXCz5+b2UDAzazIzH5OcLkqInVTgXlKSzJm1sjMmlS+Br4DzKaWCz+PAm4nfq27FJgI/DSF/UQkojI0VKst8EwwcL8e8Ii7TzCzadRW4efg9u2Ze/RjiEhkGJmpyeDunwJ9drF+DRkq/JzKXdQuZvaCma0ys5Vm9pyZdUn1BCISPWae0hK2VPrgHgEeB9oRf7juCeDRbAYlIjksqKqVyhK2VBKcufv/uXtZsDxEDbdmRSS6DE95CVuysagtg5evBcMlxhFPbKcCL9ZCbCKSozI1FjXbkt1kmE48oVU2NC9K+MyB32UrKBHJbXk/H5y7d67NQEQkP8TvouZ/C24HM+sNHAjUr1zn7g9mKygRyW150oCrOcGZ2WjiI/sPBMYDxwJvAEpwInVRisOwckEqd1FPJv7Q3Rfufh7xB/OKsxqViOQsI36TIZUlbKlcom5z9wozKzOzpsQHvkbqQd8vS8o46arnKCmtoKy8guOO7MIvzjmY2Z+s5uq/TmF7STkFBTFuuuQI+vVsG3a4tWLVmu38+e75rNtQQsyMYUftzYhh8Zlpnp+4lH9NWk5BzDi4b0vOP6MzMz9Yxz/HLaKsrIJ69WJccEZn+vRqHu4PEbLy8gqG/mQc7Vo34rEbR4QdTkbFcuARkFSkkuDeNbPmwD3E76xupoYBrgBmdh9wHLDS3XvvSZDZVlxYwJN/OoFGDQopLStnxJXPcdTBHfnTg9O48qwBHH1wR1555zN+N/Ytnr45Wr+o1SmIGRee2YWunRuzdVsZl/16Fv16N2fdhlLemr6Wv/2hP4WFMdZvKAGgaZNCRv/8QFq1KGbR51v4zR9n8+Cdh4b8U4Tr78/MonvHFmzaWhJ2KBkXmUtUd/+Ju693978DxwDnBpeqNbkfGLaH8dUKM6NRg0IASssqKC2v2DHdy+Yt8V/OjVtK2LtlozDDrFUtWxTRtXNjABo2qEeHfRqwZl0J419ZzikntKewMP6r07xZEQD7d2pMqxbxnov92jekpLSC0tKKcILPAUtXbeLltxdyzvCc/r99txipDdPKhaFayR707Z/sM3efkezA7j7FzDrtQWy1qry8gu9e8hQLl23gvON7079nW64fNYjTf/Ui198zlQp3nv/LSWGHGYoVq77k08+20GP/Jox9ZCFzPtzIg49/RlGhccEZXei+f5Odtn/zndV02a/xjiRYF/3qrilc9+Mj2Ly1NOxQMi9HhmGlItkl6p+TfObAUZkIIJimeCRA+47tM3HI3VJQEOPfd5/Chs3bOf+6iXy4aC0PjZ/LdRcN5Lgju/D8fxZw1a2TefyPx4cWYxi2fVnOjbfN48dnd6Fhw3pUVDibt5Rx63V9+OjTzdx0xzzG/uVggilv+GzJFv45bhE3XBO9lkuqJrz1Ka2bN6Bv97a8MWtJ2OFkRS60zlKR7EHfobURgLuPAcYA9Duob+jfWrPGxQzssw+vTVvM45M+4ncXDwLg+MH7c9Vt/wk5utpVVlbB72+by9BBbRh0cGsAWrUsZuDBrTAzeuzfBDNj46ZSmjUtYvWa7dzwl3lcNao77do2CDn68Lw9ezkTpi5k0jv3sb2knE1bSxj5hwmMuTYvemxqZFDjZJa5ou5eQyRYvX4bGzZvB2Db9jKmzFhC1w4taNuqIVPfj8+I/MaspXTep1mYYdYqd+f2ez6mw74NOWn4Vy3rww9qxXtz1wOwdPlWysoqaNqkkM1byvjtLXP40amdOLBH3fmedmX0hYOYM+4C3n/4fMb+z7Ec2bd9ZJJbpViKS9hU2R5YuXYrl93yKuUVTkWFc8Lg/TnmsP1o2riI/737TcrLneKiAm6+/Fthh1pr5n60kVffWEmnDg255Np4d+u5p3bimCFtuW3MR/zk6unUq2dcOaoHZsa/Xl7GshXbePSZxTz6zGIAbrim946bEBIt+XKJau7ZCdTMHiU+AqI1sAIY7e5jk+3T76C+/trUl7MSTxTMWD0n7BBy3sC21d4bE2DQoUOY/u7MPbpFsO+BPf2ih+9JadvR/QdPr6GqFmZWQLzQ/FJ3Py6Tle1TmdHXzOwsM/tN8L6jmR1S037ufrq7t3P3QndvX1NyE5H8ETNPaUnRZexcyKqysn034JXgPVUq2w8D7gqSY/VxpnDyu4DDgdOD95uAv6UauYhETxqFn5Mfx6w98D3g3oTVI4hXtCf4+8SE9ePcfbu7LwQqK9tXK5U+uEPdvb+ZzQRw93Vmpo4VkTrKLK1xpq3N7N2E92OCJycq3Qb8Ekh8mHKnyvZBUWiIV/Z7K2G7jFS2Lw2agQ5gZm2AuvuIuoikM11StZXtzaxyKOd0Mxuym6dNmmlTSXB/JV5xei8zu5H47CK/TmE/EYmoDE14OQg4wcyGE59rsqmZPURQ2T5ovWW3sr27P0y8CfkHYDlwors/kfaPIiKRkGr/W02tPHe/NrgB2Yn4zYNX3f0sarOyvZl1BLYCLySuc/fFNe0rItGU5SnLb6K2KtsTr6BVWXymPtAZmE/8Vq2I1EGZTnDuPhmYHLzOWGX7GhOcu38j8X0wy8hF1WwuIhFn5MYwrFSkPVTL3WeY2cHZCEZE8oDlz1CtVPrgrkx4GwP6A6uyFpGI5LwoteASH8ArI94n91R2whGRXFc5o28+SJrgggd8G7v7L2opHhHJA3nfgjOzeu5elmzqchGpm6JQ2f4d4v1ts8zseeAJYEvlh+7+dJZjE5EcZEQjwVVqCawhXoOh8nk4B5TgROqofCkbmCzB7RXcQZ3NV4mtUn6kbxHJiigUfi4AGrMbI/hFJLoqawbng2QJbrm7X19rkYhI3siT/JY0weXLzyAitSifygYmS3C7HOwqInVdWvUWQpWs8PPa2gxERPJHvlzeqS6qiKQtEkO1RESqivR0SSIisTx5TiRfErGI5AozLMUl+WGsvpm9Y2bvmdkcM7suWN/SzCaZ2cfB3y0S9rnWzBaY2Xwz+25NoSrBiUhaMlV0BtgOHOXufYC+wDAzO4xarmwvIrITS/FPMh63OXhbGCxOBivbK8GJSNoqh2vVtBBUtk9YRu58HCsws1nEa59Ocve3qVLZHkisbP95wu4ZqWwvIrKTWOpPwlVb2R4gKPvX18yaA8+YWe8kx8pKZXsRkR3i88Fl9i6qu683s8nE+9YyVtk+pxKcAYWWUyHllEPb9Ak7hJy3cNOisEPIadvLSzJynEzkNzNrA5QGya0B8G3gj3xV2f4mvl7Z/hEzuxXYh0xUthcRqaqmGwgpagc8ENwJjQGPu/u/zGwqtVjZXkRkJ5lowbn7+0C/Xayvvcr2IiKJUnkEJFcowYlIegwK8mSolhKciKQtP9KbEpyIpMmgxnGmuUIJTkTSlh/pTQlORHaDWnAiEln5kd6U4EQkTfGqWvmR4pTgRCRNeg5ORCIsTxpwSnAikj614EQkkhIms8x5SnAikja14EQksvKlbKASnIikRYWfRSTS8mUkQ74kYhHJGZmpjGpmHczsNTObFxR+vixYr8LPIhKeDBV+LgOucvcDgMOAnwbFnVX4WUTCYxZLaUnG3Ze7+4zg9SZgHvE6pyr8LCLhSaMFl7Tw847jmXUiXp9BhZ9FJDzx5JWZws8AZtYYeAq43N03JrmBkXbhZ7XgRCR9lcMZalpqPIwVEk9uD7v708HqFUHBZ/a08LMSnIikLRM3GSzeVBsLzHP3WxM+qiz8DF8v/HyamRWbWWdU+FlEMi/Fe6Q1GwScDXxgZrOCdb8iXtFehZ9FJASWmaFa7v4G1WdKFX4WkbDkx0gGJTgRSZtmExGRSMpYD1wtUIITkfTlyWB7JThg6cpN/OTmf7Ny7VZiMeOc4b246KQ+rNv4JRfeOJHFKzbSsW1Txv76uzRvUj/scENx6Z9f4eW3FtG6eQPeuOcMAD74ZBU/v30y20vKKSgwbv7ZEPr3bBtypLXnt3fM5PV3v6Bls2Ke+OtRO9aPe/FTHhv/KQUFMY44qC2Xn9uL9RtL+OXN05izYB3HD+3INSO/GWLkeyp/is5k7Tm46mYKyEUFBTGuHzmIqWPPZMLtJzP2+feZ/9labn9sOoP7tWfa/WczuF97bn9sRtihhua0Y3ry2O+P32nddff8l1+cdQiT/34a15x7KL+9982QogvH8Ud14M7fHL7TumkfrGLyO8t57LahPPnXozhnRFcAiotiXHx6T644t1cYoWacpfgnbNl80Le6mQJyzt6tGtGnW3y4W5OGRXTv2JLlqzfz0tSFnHpMTwBOPaYn4//7aZhhhmrgN/elRZXWqxls2loCwMYtJezdqlEYoYXmoF6tadakaKd1T05YxHnf70ZRYXySi5bNiwFoUL8e/Q5sRVFR0skv8oaZpbSELWuXqMEg2coBs5vMrHKmgLnZOmcmLP5iIx8sWMVBPfdm1bqtO/7R7t2qEavXbws5utxy48VHcsq1zzN6zJtUuPPSbT8IO6TQfbZsMzPmruVvD8+jqLCAK37Ui17dWtS8Y94JP3mlolaGalWZKSBnbd5Wwo+uf4kbLz6SJo2Kat6hjvvnC7O5YdQRvP/Ij7hh1BFcduurYYcUuvJyZ9OWEh7442AuP7cXV9/yLu5Jx4PnpQzNB5d1WU9wVWcK2MXnIyunUlm9ek22w6lWaVk5513/Eicf1Z3jjtgfgDYtGvLFmi0AfLFmC62bNwgtvlw0btKHO76rEYO7MmP+ipAjCt9erRtw1GH7YGb07t6CmMH6jSVhh5VRlbOJ1PU+uOpmCtiJu49x9wHuPqB161bZDKda7s5lt75K944t+cnJ/XasH3ZYZx6b9CEAj036kGMP7xxKfLlq71aNePP9pQC8PmsJXfZpHm5AOWDoIXsz7f1VAHy2dDOlZRU0bxq1q4HU+t8i3QeXZKaAnPP2nOU8/u/5HNi5FUNGjQPgf84/jMtO688FN0zkoQlzab9XE+779bCQIw3Pj38/kTffX8raDV/yjTP+ydVnH8pfrhjKr+56nfKKCooL63Hr5UPDDrNWXfvnd5k+ZzXrN5Yw7MKJjDqtJyOO3o/f3jmTUy59lcLCGNdd2n/HP/TvjXyZLdvKKC2rYPI7y7lr9OF06dA05J9i9+RC6ywVlq3+ATM7Angd+ACoCFb/yt3HV7dP/4P6+utvqR9Hdt/iLZ/XvFEddsrQM5g9c84eZadv9Ovtz/7nyZS27drsgOk1TXiZTdm8i5pspgARyVepzWWZEzSSQUR2Q35kOCU4EUlbvvTBKcGJSFqM3LhDmgrVZBCRtGXqOTgzu8/MVprZ7IR1qmwvIuHJ4IO+9xOvUp9Ile1FJEQZGqvl7lOAtVVWq7K9iIQnjRZcSpXtq1BlexEJTyYr26d12q9TZXsRyZzKu6hZHIuqyvYiEp4szyaiyvYiEp5MPQVnZo8CQ4j31S0BRqPK9iISqgw96Ovup1fzkSrbi0g4NFRLRCLJgJgSnIhEUq4UXEiBEpyIpCk36i2kQglORNKWLwlOz8GJSGSpBSciacuX+eCU4EQkLbqLKiLRphaciEST7qKKSITlR3pTghOR3aAWnIhEl/rgRCSKdBdVRKJNLTgRiar8SG9KcCKSNj0mIiIRpgQnIpFklj9jUc09aVnBWmVmq4DPwo4jQWtgddhB5DB9PzXLte9oP3dvsycHMLMJxH+uVKx292F7cr49kVMJLteY2bsZLFobOfp+aqbvKFyaD05EIksJTkQiSwkuuTFhB5Dj9P3UTN9RiNQHJyKRpRaciESWEpyIRJYS3C6Y2TAzm29mC8zsmrDjyTVmdp+ZrTSz2WHHkovMrIOZvWZm88xsjpldFnZMdZX64KowswLgI+AYYAkwDTjd3eeGGlgOMbPBwGbgQXfvHXY8ucbM2gHt3H2GmTUBpgMn6neo9qkF93WHAAvc/VN3LwHGASNCjimnuPsUYG3YceQqd1/u7jOC15uAecC+4UZVNynBfd2+wOcJ75egX07ZTWbWCegHvB1yKHWSEtzX7WoUsa7jJW1m1hh4Crjc3TeGHU9dpAT3dUuADgnv2wPLQopF8pSZFRJPbg+7+9Nhx1NXKcF93TSgm5l1NrMi4DTg+ZBjkjxi8bmExgLz3P3WsOOpy5TgqnD3MuASYCLxzuHH3X1OuFHlFjN7FJgK9DCzJWZ2Qdgx5ZhBwNnAUWY2K1iGhx1UXaTHREQkstSCE5HIUoITkchSghORyFKCE5HIUoITkchSgssjZlYePHIw28yeMLOGe3Cs+83s5OD1vWZ2YJJth5jZwN04xyIz+1r1perWV9lmc5rn+q2Z/TzdGCXalODyyzZ37xvM4FECjEr8MJgJJW3ufmENM10MAdJOcCJhU4LLX68DXYPW1Wtm9gjwgZkVmNnNZjbNzN43s4sg/nS9md1pZnPN7EVgr8oDmdlkMxsQvB5mZjPM7D0zeyUYLD4KuCJoPR5pZm3M7KngHNPMbFCwbysze9nMZprZP9j1uN6dmNmzZjY9mDdtZJXP/hzE8oqZtQnW7W9mE4J9Xjeznhn5NiWSVNk+D5lZPeBYYEKw6hCgt7svDJLEBnc/2MyKgTfN7GXiM1r0AL4BtAXmAvdVOW4b4B5gcHCslu6+1sz+Dmx291uC7R4B/uLub5hZR+KjPg4ARgNvuPv1ZvY9YKeEVY3zg3M0AKaZ2VPuvgZoBMxw96vM7DfBsS8hXsRllLt/bGaHAncBR+3G1yh1gBJcfmlgZrOC168TH+84EHjH3RcG678DfLOyfw1oBnQDBgOPuns5sMzMXt3F8Q8DplQey92rm/Pt28CB8SGXADQNJnYcDHw/2PdFM1uXws90qZmdFLzuEMS6BqgAHgvWPwQ8HczOMRB4IuHcxSmcQ+ooJbj8ss3d+yauCP6hb0lcBfzM3SdW2W44NU/7ZClsA/GujcPdfdsuYkl57J+ZDSGeLA93961mNhmoX83mHpx3fdXvQKQ66oOLnonAxcF0PZhZdzNrBEwBTgv66NoBQ3ex71TgW2bWOdi3ZbB+E9AkYbuXiV8uEmzXN3g5BTgzWHcs0KKGWJsB64Lk1pN4C7JSDKhshZ5B/NJ3I7DQzE4JzmFm1qeGc0gdpgQXPfcS71+bYfGiMP8g3lJ/BvgY+AC4G/hP1R3dfRXxfrOnzew9vrpEfAE4qfImA3ApMCC4iTGXr+7mXgcMNrMZxC+VF9cQ6wSgnpm9D/wOeCvhsy1ALzObTryP7fpg/ZnABUF8c9B08pKEZhMRkchSC05EIksJTkQiSwlORCJLCU5EIksJTkQiSwlORCJLCU5EIuv/AS70wjYAB1+lAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 使用更强的分类器\n",
    "\n",
    "# In[16]:\n",
    "\n",
    "\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "params = {'n_estimators':[100,200,500],'max_depth':range(15,20),\n",
    "          'criterion':['entropy','gini'],\"class_weight\":['balanced'],\"random_state\":[2021],}\n",
    "clf = GridSearchCV(estimator=RandomForestClassifier(), param_grid=params, cv=8, n_jobs=5, scoring=\"f1_macro\")\n",
    "clf.fit(X_train,y_train)\n",
    "print(\"Best set score:{:.2f}\".format(clf.best_score_))\n",
    "print(\"Best parameters:{}\".format(clf.best_params_))\n",
    "print(\"Test set score:{:.2f}\".format(clf.score(X_test, y_test)))\n",
    "eval(clf,X_train, X_test, y_train, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "pycharm": {
     "is_executing": true
    }
   },
   "outputs": [],
   "source": [
    "# 特征选取对分类的影响\n",
    "\n",
    "# In[17]:\n",
    "\n",
    "\n",
    "def get_feature2(df, train_mode=True):\n",
    "    df = df.iloc[::-1]\n",
    "    if train_mode:\n",
    "        df['type'] = df['type'].map({'拖网': 0, '围网': 1, '刺网': 2})\n",
    "        label = np.array(df['type'].iloc[0])\n",
    "        df = df.drop(['type'], axis=1)\n",
    "    else:\n",
    "        label = None\n",
    "    df['dis'] = np.sqrt(df['x'] ** 2 + df['y'] ** 2)# 添加了一个距离特征\n",
    "    features = np.array([\n",
    "        df['x'].std(), df['x'].mean(), df['x'].max(), df['x'].min(),\n",
    "        df['y'].std(), df['y'].mean(), df['y'].max(), df['y'].min(),\n",
    "        df['速度'].mean(), df['速度'].std(), df['速度'].max(), df['速度'].min(),\n",
    "        df['方向'].mean(), df['方向'].std(), df['方向'].max(), df['方向'].min(),\n",
    "        df['dis'].mean(), df['dis'].std(),df['dis'].max(), df['dis'].min(),\n",
    "                         ])\n",
    "    return features, label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "pycharm": {
     "is_executing": true
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "1000\n",
      "2000\n",
      "3000\n",
      "4000\n",
      "5000\n",
      "6000\n",
      "Best set score:0.80\n",
      "Best parameters:{'class_weight': 'balanced', 'criterion': 'entropy', 'max_depth': 13, 'random_state': 2021}\n",
      "Test set score:0.81\n",
      "训练集准确率 0.9671428571428572\n",
      "训练集f1_score 0.962138702668638\n",
      "测试集准确率 0.8464285714285714\n",
      "测试集f1_score 0.8073563241670992\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAATgAAAEGCAYAAADxD4m3AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAjRklEQVR4nO3de5gU1ZnH8e9vBhguglwGELkI4oiCUUTEC4qANzQmYCKKMYZNyCJqRE2yCWgSE7NEs5oEjZINXjZko+jESyQxARFF1EWRqwoEQVEYQW5KuAszvPtH12ALMz1d0D3VXfN+fOrprtPVp176gddTdeqcIzPDOefiqCDqAJxzLls8wTnnYssTnHMutjzBOediyxOccy626kUdQDLVb2Jq2DzqMHLWiccWRx1CzhOKOoSctuqD1WzauOmQfqTCliVme3akdaxtWzPNzAYdyvkORW4luIbNKep9TdRh5Kzp/xgRdQg5r6iwKOoQcto5p593yHXYnh1p/zvdNfO2SP+vnFMJzjmXBwQoP1rKnuCcc+EVFkYdQVo8wTnnQpK34JxzMSVA+fEAhic451x4Bd6Cc87FlV+iOufiSX6J6pyLKQGFnuCcc7HkLTjnXJz5PTjnXCz5YyLOuVjzx0Scc7EkQYEP1XLOxZXfg3POxZZfojrn4skfE3HOxZXPB+ecizVPcM65eJJPeOmci6k8ukTNjzuFzrkcEnQypLOlqkXqJmlh0rZF0k2SWkqaLml58Noi6TtjJa2QtEzShTVF6gnOORdegdLbUjCzZWbW08x6AqcAO4CngTHADDMrAWYE+0jqDgwDegCDgAmSUl4re4JzzoUnpbel71zgXTP7ABgMTArKJwFDgveDgcfM7FMzWwmsAPqkqtTvwTnnwgk3VKtY0tyk/YlmNrGK44YBk4P3bc1sLYCZrZXUJihvD7yW9J2yoKxanuCcc+GlP5Jho5n1TnWApAbAl4GxNdRV1Ukt1Rf8EtU5F14GOhmSXATMN7N1wf46Se0Agtf1QXkZ0DHpex2ANakq9gTnnAun8jGRzN2Du5LPLk8BpgDDg/fDgWeSyodJKpLUBSgB5qSq2C9RnXMhCWXoOThJjYHzgWuSiu8ESiWNAFYBQwHMbLGkUmAJUA5cb2YVqer3BOecCyWTz/ma2Q6g1X5lm0j0qlZ1/DhgXLr1e4JzzoUjKCzMj5EMdTbBHdOhOQ//6OJ9+0e1a8Ydk17jlUVl/PqmgTSsX4/yir18/94Xmb9sHUMHduOGy0/Zd3yPo4s559pHefvdjVGEX+veLdvM9Xe8uG9/1dqtfPfqXlx2XgnX3fECZeu20aHtYUwYO5DmTYsijDRaFRV7ueA7T3BEcRMe+fkX+WTLLkaOe47V67bSsW1THvjRBTRv2jDqMA9Zpi5Rsy2rnQySBgVDKlZIGpPNc4W1omwz/UY9Sr9Rj9L/usns/LScZ195l5/9+1n81x9fp9+oR7lj0mv8bORZAPz5hWX7jh/1y2msWrelziQ3gK4dmjP1/kuZev+lPHvvYBo1rMegM4/i/tJF9O15JLMeGkrfnkcyoXRR1KFG6oGn36Sk076RRfz28fmcfXIHXvvDVZx9cgd++/iCCKPLjMz3MWRP1hJcMITifhJdwN2BK4OhFjnnnJM78v6af7F6/VYMaNqkAQDNmjTgo03bDzj+qwO68eQLy2o5ytzx6sI1dGrXlA5tmzJ99iouO68EgMvOK+G52asiji46azZsY/qcD7hq0PH7yqbOfp8rzu8GwBXnd+Mf/7cyqvAySlJaW9SyeYnaB1hhZu8BSHqMxFCLJVk850H5yoBjefLFRMK6ZcJLPHnnpfx85NmoQAwaXXrA8Zf2L+Gqn/yttsPMGVNeeo/B53QFYOPmnbRt2RiAti0bs/FfO6MMLVI//t0r/OTbZ7Bt5559ZRs+2UHbVk0AaNuqCRs3x+P3yYXklY5sXqK2B1Yn7dc4rCIK9esVcNEZR/OXl1YA8K0vncgtv5vFCV97mFt/N4t7v3/e544/5bi27Py0nKXvb4oi3Mjt3lPB9NdX8cWzu0QdSk557rX3KW7eiJOObVPzwfkuzcvTXMiB2WzBpTWsQtJIYCQARYdnMZyqndenM4uWr2fD5h0AXHnB8Yy5/yUA/vLScu757ud7q78yoBtPvvBOrceZK2bOLeOErq1o3aIRAMXNG7Hu4x20bdmYdR/voPjwRhFHGI05i9cy7bX3mfHGKnbtLmfbjj1cd+d0WrdozLpN22nbqgnrNm2nuHn+/z5CFORJL2o2W3BpDasws4lm1tvMeqt+kyyGU7XLBhzLky9+lrDWbtxO35MSDc1+J3fkvQ837/tMgsH9juHJmXX3/tszM99lcP+u+/bPP70TTzy/HIAnnl/O+Wd0iiq0SP1oxBksfHQ4c//3an5/ywX07dmeCWPO58LTO/P49MTfl8enL2PQGZ2jDTRD/B4cvAGUBEMqPiQxW8DXsni+0BoV1aP/KZ24efwL+8pu+s0M7riuH/UKC9i1u4KbfvPZZ2ee2J41G7fxwdotUYQbuZ27ynl5wRruGH3WvrLrLj+Ra3/xAo9Pe4cjWzfhv2+t8vnMOuuGYb349/+cxqNTl9K+zWE8+KMa52jMfTly+ZkOmaUcjH9olUsXA+OBQuDh4CnkahU0bW9Fva9JdUidtuofI6IOIecVFdbdZ/DScc7p57Fg3sJDSk/125VY8b+NT+vYj+68ZF5Ns4lkU1Yf9DWzvwN/z+Y5nHO1K/EcXH404ersSAbn3MEr8JXtnXOxlEf34DzBOedCEvIWnHMujvJoWVRPcM658LyTwTkXT34PzjkXZwUF+bGciyc451woIsyqgdHKjzTsnMsdAhUora3GqqTmkp6Q9E9JSyWdIamlpOmSlgevLZKOHxtMoLtMUo3j3jzBOedCy+B0SfcAU83sOOAkYCkwBphhZiXAjGCfYMLcYUAPYBAwIZhYt1qe4JxzIaU3k0hNPa2SmgH9gIcAzGy3mW0mMTHupOCwScCQ4P1g4DEz+9TMVgIrSEysWy1PcM65UEKuyVAsaW7SNjKpqqOBDcD/SFog6UFJTYC2ZrYWIHitnEU09CS63sngnAtHoXpRN6aYTaQe0Au4wcxel3QPweVo9Wc+QMrpkLwF55wLLUP34MqAMjN7Pdh/gkTCWyepXeI8agesTzq+xkl0k3mCc86FloleVDP7CFgtqVtQdC6JRammAMODsuHAM8H7KcAwSUXBRLolwJxU5/BLVOdcKBkei3oD8IikBsB7wDdJNLxKJY0AVgFDAcxssaRSEkmwHLjezCpSVe4JzjkXjqAgQxnOzBYCVd2jq3Lu+2BW8JQzgyfzBOecC0k+4aVzLp4EPh+ccy6m5NMlOediLE/ymyc451x43oJzzsWW34NzzsWS5L2ozrkYy9RzcNnmCc45F1qe5DdPcM65cCS/B+ecizHvRXXOxVae5DdPcM65kCQKCvNjpjVPcM65UDI8XVJW5VSCO+nY1rw4bVTUYeSstz5eGnUIOe/U1j2jDiGnZerxDr8H55yLLU9wzrl4Uv6sbO8JzjkXisA7GZxz8ZUnV6i+qpZzLqQ0V7VP5z6dpPclvSVpoaS5QVlLSdMlLQ9eWyQdP1bSCknLJF1YU/2e4JxzoWVi2cAkA8ysZ9IC0WOAGWZWAswI9pHUHRgG9AAGARMkFaaq2BOccy6UyufgMrDwc3UGA5OC95OAIUnlj5nZp2a2ElgB9ElVkSc451xombpEBQx4TtI8SSODsrZmthYgeG0TlLcHVid9tywoq5Z3MjjnwhEUpn/5WVx5by0w0cwmJu33NbM1ktoA0yX9M/WZD2CpTu4JzjkXSuISNWVeSbYx6d7aAcxsTfC6XtLTJC4510lqZ2ZrJbUD1geHlwEdk77eAViT6uR+ieqcCy0T9+AkNZHUtPI9cAHwNjAFGB4cNhx4Jng/BRgmqUhSF6AEmJPqHN6Cc86FVpB+Cy6VtsDTwb26esCjZjZV0htAqaQRwCpgKICZLZZUCiwByoHrzawi1Qk8wTnnQhFV3wwLy8zeA06qonwTcG413xkHjEv3HNUmOEm/JcUNPDMbne5JnHMxIigsyEgLLutSteDmpvjMOVeH5ctQrWoTnJlNSt6X1MTMtmc/JOdcLhOWqXtwWVdjL6qkMyQtAZYG+ydJmpD1yJxzOUtpblFL5zGR8cCFwCYAM1sE9MtiTM65HFcgS2uLWlq9qGa2er9hFym7Zp1z8XWI40xrVToJbrWkMwGT1AAYTXC56pyrmwpzoHWWjnQuUUcB15MY1Poh0DPYd87VUZKltUWtxhacmW0ErqqFWJxzeUDkz5oM6fSiHi3pr5I2SFov6RlJR9dGcM653JQvLbh0LlEfBUqBdsCRwJ+BydkMyjmXw4JVtdLZopZOgpOZ/a+ZlQfbn6hhDibnXHwJS3uLWqqxqC2Dty9KGgM8RiKxXQE8WwuxOedyVBzGos4jkdAqG5rXJH1mwM+zFZRzLrfl/XNwZtalNgNxzuWHRC9q/rfg9pF0AtAdaFhZZmZ/zFZQzrnclicNuJoTnKTbgP4kEtzfgYuAVwBPcM7VRXk0VCudXtTLSMyu+ZGZfZPEDJxFWY3KOZezRKKTIZ0taulcou40s72SyiU1I7HCTewe9D31G3/isEYNKCwQhYUFTLvvq3yyZRejfjGd1eu20rFtU35/6wU0b1o3cvv6Tbu4a8I/+XjzHgoEF5/bjksv6sAfSlcye+4mVADNmzXgP0Z1o1XLxG/y3gfbuOehd9ixowIViPv+sxcNGtSddY2+c9d0pr2+kuLmjZn94NcBeGvFBr47/gV27SmnXmEBd48ewCnHHRFxpIeuIAceAUlHOglurqTmwAMkela3UcNKNgCSHgYuAdab2QmHEmRteeK/vkSrwxvt27+vdAFnndyBG644md8+voD7Hl/Aj759eoQR1p7CAjHy610p6dKUHTvLuf6W+fT6QguGXtKRf7s80f/09NQy/vTUB9z47WOpqDB+ef8/+cH1x9H1qMPYsnUPhfXy5DomQ668sDv/PuQkRv3yuX1ltz3wCj/4xmmc36czz72+ktsmvsLffn1ZhFFmRmwuUc3sOjPbbGb/DZwPDA8uVWvyB2DQIcYXqWmz3+fy844F4PLzjmXq7JURR1R7WrUooqRLUwAaN6pHp/aN2fjxpzRp/Nn/E3ft2rvvL/q8Nz+mS6cmdD3qMACaNa0fZnHgWOh7YntaNG34uTIBW7fvBmDL9t0c0apJBJFllkhvmFa6Q7UkFUpaIOlvwX5LSdMlLQ9eWyQdO1bSCknLJF1YU92pHvTtleozM5ufqmIzmyWpc00B5Aohht3yLAKu/mJ3rr64Oxs+2Unb4C9k21ZN2Lh5Z7RBRuSjDbtY8f42jjumGQD/8/hKps9aR5PGhdz148SiSGVrdyLB2Dve5F9b9tD/jNZc/uVOUYadE35x3Tl8dczT/Hjiy9heY+q9l0cd0qHL/DCsG0lMwdYs2B8DzDCzO4NBBmOAH0rqDgwDepAYNvq8pGNTLR2Y6hL1Vyk+M2BgiD9AtSSNBEYCdOjUIRNVHpQpvxnCEUESu2LM3zimY/PIYsklO3dVcPtvFnPtN7rua71984oufPOKLkz+yyqmTFvDN4Z2pmKv8fayLdz3n70oKirgh+MWUXJ0U04+oUUNZ4i3h//6Jr+4th9f7lfC0zPfYfTdz/OXu74SdViHLFMD6SV1AL5IYinA7wbFg0k8uQEwCZgJ/DAof8zMPgVWSloB9AFmV1d/tZeoZjYgxZaR5BacZ6KZ9Taz3sXFrTJVbWiVlw7FzRtxUd/OLPznelq3aMS6TYl1dtZt2k5x80apqoid8vK93P6bxQzs24az+rQ+4POBfdvw8pwNABS3LOLE4w/n8Gb1aVhUyKk9W7F85bbaDjnnTH5uKV86+xgAhpxTwvxl6yKO6NCJxISX6WxAsaS5SdvI/aobD/wA2JtU1tbM1gIEr22C8vbA6qTjyoKyatWdLq4Uduzaw7Ydu/e9f2leGd06t+SC0ztT+vw7AJQ+/w4XntE5wihrl5nx64nv0OnIxlz2xY77yj9cu2Pf+9nzNtHxyMYA9D6xBStXbWfXpxVUVBhvLd3MUe0b13rcuaZdcRNeXfQhALMWrObo9s2jDShDCtLcgI2VDZhgm1hZh6TKTsh5aZ62qgvjlE1JX9ke2PDJTr71s2kAlFfs5dIBxzDw1E707NaGa8ZNZ/LUpbRv05SJt54fcaS1Z/GyLTz/8jq6dGzCqDGJJXK/dUUXps78iNVrdlAg0aZ1ETeOSHTCND2sPl+5uAM33DofBH16tuS0XtG1yKMwYtw/eHVRGZv+tYsewx5izPDTGH/zuYydMIvyir00bFDI+JszdvETqQxdovYFvizpYhKjpJpJ+hOwTlI7M1srqR2JR9Mg0WLrmPT9DsCalHGaZed5FkmTSVxHFwPrgNvM7KFU3zn5lJ724uznUh1Sp731sS+FUZNTW/eMOoSc1ve0/sybu+CQugjadz/OrnnkgbSOva1Xv3lm1rum4yT1B75vZpdIugvYlNTJ0NLMfiCpB4n5KfuQ6GSYAZQcbCdD5YlFYsryo83sdkmdgCPMLOWzcGZ2ZU11O+fyU5YH298JlEoaAawChgKY2WJJpcASoBy4PlVyg/QuUSeQuAE4ELgd2Ao8CZx60OE75/Japp9wNLOZJHpLMbNNJIaHVnXcOBI9rmlJJ8GdZma9JC0ITvBJsHygc64OknJjnGk60klweyQVEvRWSGrN57t0nXN1TL6MUUknwd0LPA20kTSOxOwiP8pqVM65nBabCS/N7BFJ80hcEwsYYmbenedcHSVi1IILek13AH9NLjOzVdkMzDmXu2LTgiOxglbl4jMNgS7AMhIDXp1zdVBsEpyZfSF5P5hl5JpqDnfOxZzInzGeoYdqmdl8Sf4MnHN1lTI3m0i2pXMP7rtJuwVAL2BD1iJyzuW8OLXgmia9LydxT+7J7ITjnMt1lTP65oOUCS54wPcwM/uPWorHOZcH8r4FJ6memZWnmrrcOVc3xaEXdQ6J+20LJU0B/gxsr/zQzJ7KcmzOuRwk4pHgKrUENpGYTaTyeTgDPME5V0fly7KBqRJcm6AH9W0+S2yV8iN9O+eyIg4LPxcCh3EQ86A75+JLikcLbq2Z3V5rkTjn8kae5LeUCS5f/gzOuVpUuWxgPkiV4KqcMtg5V9dZ3vSiplr4+ePaDMQ5lz+U5payDqmhpDmSFklaLOlnQXlLSdMlLQ9eWyR9Z6ykFZKWSbqwpjjz5YFk51wOkSytrQafAgPN7CSgJzBI0unAGGCGmZWQWBpwTOKc6g4MIzFV2yBgQjDaqlqe4JxzoVROl5TmyvbVsoRtwW79YDNgMDApKJ8EDAneDwYeM7NPzWwlsILEGqnV8gTnnAutQEprA4olzU3aRibXI6lQ0kISq9dPN7PXgbZmthYgeG0THN4eWJ309bKgrFqh54NzztVxEkr/QbiNqVa2DxZu7impOfC0pBNSnbmqKlKd3FtwzrlQ0u1gCPOcmZltJrHw8yBgnaR2AMHr+uCwMqBj0tc6AGtS1esJzjkXmtL8L2UdUuug5YakRsB5wD+BKcDw4LDhwDPB+ynAMElFkroAJSQmBamWX6I650LL0FCtdsCkoCe0ACg1s79Jmg2UShoBrAKGApjZYkmlwBISk+9eH1ziVssTnHMutIIMDHQyszeBk6so30Q1Aw3MbBwwLt1zeIJzzoWSmA8uP0Zy5lSCK5BoWFgUdRg569TWPaMOIeet3Pp+1CHktE8rdmeknjzJb7mV4Jxz+aGmDoRc4QnOOReat+Ccc7GUziMgucITnHMuHEFhnjThPME550LLj/TmCc45F5IgzFjUSHmCc86Flh/pzROcc+4geAvOORdb+ZHePME550JKrKqVHynOE5xzLiR/Ds45F2N50oDzBOecC89bcM65WJK8BeecizFvwTnnYssnvHTOxVLlws/5IF/idM7lEAVro9a01VBHR0kvSloqabGkG4PylpKmS1oevLZI+s5YSSskLZN0YU1xeoJzzoWUsZVRy4HvmdnxwOnA9ZK6A2OAGWZWAswI9gk+Gwb0ILF+6oRgRa5qeYJzzoWWifRmZmvNbH7wfiuwFGgPDAYmBYdNAoYE7wcDj5nZp2a2ElgB9El1Dr8H55wLTUq7bVQsaW7S/kQzm3hgfepMYgnB14G2ZrYWEklQUpvgsPbAa0lfKwvKquUJzjkXWog+1I1m1jtlXdJhwJPATWa2JcW9u6o+sFR1+yWqcy6UxOVnev/VWJdUn0Rye8TMngqK10lqF3zeDlgflJcBHZO+3gFYk6p+T3DOufAqhzPUtKWsQgIeApaa2a+TPpoCDA/eDweeSSofJqlIUhegBJiT6hx+ieqcCy1Dj/n2Ba4G3pK0MCi7BbgTKJU0AlgFDAUws8WSSoElJHpgrzezilQn8ATnnAsprUdAamRmr6So6NxqvjMOGJfuOTzBOefCkQ/Vcs7Fmic451xM+WwizrlYyswduNrhCc45F57fg8sv37lrOtNeX0lx88bMfvDrALy1YgPfHf8Cu/aUU6+wgLtHD+CU446IONJoVPn7vLuB741/gW0799DpiGZMHHshzZoURRxp7fnpbxfw8tyPaHl4EX++d+C+8seefY/H//4ehYUFnHVKW24a3oPXFq7n3v9dQnn5XurVK+Cm4T3oc2LrCKM/FPmz6EzWHvStbiqUXHXlhd154o4hnyu77YFX+ME3TuPl31/F2OGnc9vEV6IJLgdU9fvc+Kvnue3bffm/B7/OJX278tvS+dEEF5EvDezIfT8543Nlb7y1gZlz1vL4+AE8ce9AvjH4GACaN2vAPbeeRuk9A7l9dC9+fE9+/1aZGsmQbdkcyVDdVCg5qe+J7WnRtOHnygRs3b4bgC3bd3NEqyYRRJYbqvp9VpRt5swTE2Od+5/Sib++vCKK0CJzSo9iDm/a4HNlT0x9n29+pYQG9ROz+LRsnmjRHnd0c1q3bARA105N2b27gt17Uj6jmtMyMR9cbcjaJWowG0DljABbJVVOhbIkW+fMtF9cdw5fHfM0P574MrbXmHrv5VGHlFOO69yKf/zfe1zctyvPzFrOhxu2Rh1S5D5Ys435Sz7m/keW0qB+ITf/Ww96lLT43DEzZq+l29GH70uC+Sn65JWOWhmLut9UKHnj4b++yS+u7cfiySMYd20/Rt/9fNQh5ZT7vn8eD055k/7XTmbbjt3Ur5fP/2Azo6LC2Lp9N5N+2Y+bhvfgh3fPxeyzCS/eXbWFe/+4mFtH9YwuyAzIyHSXtSDrCW7/qVCq+HykpLmS5m7YsCnb4YQy+bmlfOnsxD2UIeeUMH/Zuogjyi3HdmrJU7+8lJm/u5KvDuxGlyMPjzqkyLUpbsTA049EEicc24ICweYtidsc6zbu5Ht3zuH2G3vRsV3+3u7I5Gwi2ZbVBFfNVCifY2YTzay3mfVu3bpVNsMJrV1xE15d9CEAsxas5uj2zaMNKMds+GQHAHv3Gnf/aQ7fvOQLEUcUvQF9juCNNzcA8MGH29hTvpfmzRqwdfseRo97jRuu7k7P43Pr73l46d1/i/U9uBRToeSkEeP+wauLytj0r130GPYQY4afxvibz2XshFmUV+ylYYNCxt88sOaKYqqq32f7zj08+MybAFxyVleuGpSzfUhZMfZXc5m3eCObt+xm0LenMWrYcQw+9yh+et8Cho5+gfr1C/jZ6F5I4vG/v8fqtdt5oHQZD5QuA2DCbWfu64TIN7nQOkuHku8PZLRi6SzgZeAtYG9QfIuZ/b2675zS+2R79fWZWYnH1Q0rt74fdQg5beiAr/H2gsWHlJ2+cPIJ9peXnkjr2GMOP35eTTP6ZlM2e1FTTYXinMtXNc9lmTN8JINz7iDkR4bzBOecCy1f7sF5gnPOhSJyo4c0HZ7gnHOh5UsLzlfVcs6FlsFlAx+WtF7S20llLSVNl7Q8eG2R9NlYSSskLZN0YU31e4JzzoWXubFafwAG7Vc2BphhZiXAjGCfYLKOYUCP4DsTJKUcH+gJzjkXWqZacGY2C/h4v+LBwKTg/SRgSFL5Y2b2qZmtBFYAfVLV7wnOORdalseitg1mI6qclahNUN4eWJ10XFlQVi3vZHDOhRKyF7VY0tyk/YlmNvGgT32glEOxPME550IL0TrbeBBDtdZJamdmayW1A9YH5WVAx6TjOgBrUlXkl6jOudCyPB/cFGB48H448ExS+TBJRZK6ACXAnFQVeQvOORdehh70lTQZ6E/iUrYMuA24EyiVNAJYBQwFMLPFkkpJzApeDlxvZinnffcE55wLLVMP+prZldV8dG41x48DxqVbvyc451woAgryZCSDJzjnXDi5suBCGjzBOedCyo31FtLhCc45F1q+JDh/TMQ5F1vegnPOhebzwTnnYsl7UZ1z8eYtOOdcPHkvqnMuxvIjvXmCc84dBG/BOefiy+/BOefiyHtRnXPx5i0451xc5Ud68wTnnAvNHxNxzsWYJzjnXCxJ+TMWVWYpV92qVZI2AB9EHUeSYmBj1EHkMP99apZrv9FRZtb6UCqQNJXEnysdG81s/5Xra01OJbhcI2nuQSx5Vmf471Mz/42i5fPBOediyxOccy62PMGlNjHqAHKc/z41898oQn4PzjkXW96Cc87Flic451xseYKrgqRBkpZJWiFpTNTx5BpJD0taL+ntqGPJRZI6SnpR0lJJiyXdGHVMdZXfg9uPpELgHeB8oAx4A7jSzJZEGlgOkdQP2Ab80cxOiDqeXCOpHdDOzOZLagrMA4b436Ha5y24A/UBVpjZe2a2G3gMGBxxTDnFzGYBH0cdR64ys7VmNj94vxVYCrSPNqq6yRPcgdoDq5P2y/C/nO4gSeoMnAy8HnEodZInuANVNYrYr+NdaJIOA54EbjKzLVHHUxd5gjtQGdAxab8DsCaiWFyeklSfRHJ7xMyeijqeusoT3IHeAEokdZHUABgGTIk4JpdHlJhL6CFgqZn9Oup46jJPcPsxs3LgO8A0EjeHS81scbRR5RZJk4HZQDdJZZJGRB1TjukLXA0MlLQw2C6OOqi6yB8Tcc7FlrfgnHOx5QnOORdbnuCcc7HlCc45F1ue4JxzseUJLo9IqggeOXhb0p8lNT6Euv4g6bLg/YOSuqc4tr+kMw/iHO9LOmD1perK9ztmW8hz/VTS98PG6OLNE1x+2WlmPYMZPHYDo5I/DGZCCc3Mvl3DTBf9gdAJzrmoeYLLXy8DxwStqxclPQq8JalQ0l2S3pD0pqRrIPF0vaT7JC2R9CzQprIiSTMl9Q7eD5I0X9IiSTOCweKjgJuD1uPZklpLejI4xxuS+gbfbSXpOUkLJP2eqsf1fo6kv0iaF8ybNnK/z34VxDJDUuugrKukqcF3XpZ0XEZ+TRdLvrJ9HpJUD7gImBoU9QFOMLOVQZL4l5mdKqkIeFXScyRmtOgGfAFoCywBHt6v3tbAA0C/oK6WZvaxpP8GtpnZ3cFxjwK/MbNXJHUiMerjeOA24BUzu13SF4HPJaxqfCs4RyPgDUlPmtkmoAkw38y+J+knQd3fIbGIyygzWy7pNGACMPAgfkZXB3iCyy+NJC0M3r9MYrzjmcAcM1sZlF8AnFh5fw04HCgB+gGTzawCWCPphSrqPx2YVVmXmVU359t5QPfEkEsAmgUTO/YDvhJ891lJn6TxZxot6dLgfccg1k3AXuDxoPxPwFPB7BxnAn9OOndRGudwdZQnuPyy08x6JhcE/9C3JxcBN5jZtP2Ou5iap31SGsdA4tbGGWa2s4pY0h77J6k/iWR5hpntkDQTaFjN4Racd/P+v4Fz1fF7cPEzDbg2mK4HScdKagLMAoYF9+jaAQOq+O5s4BxJXYLvtgzKtwJNk457jsTlIsFxPYO3s4CrgrKLgBY1xHo48EmQ3I4j0YKsVABUtkK/RuLSdwuwUtLQ4BySdFIN53B1mCe4+HmQxP21+UosCvN7Ei31p4HlwFvA74CX9v+imW0gcd/sKUmL+OwS8a/ApZWdDMBooHfQibGEz3pzfwb0kzSfxKXyqhpinQrUk/Qm8HPgtaTPtgM9JM0jcY/t9qD8KmBEEN9ifDp5l4LPJuKciy1vwTnnYssTnHMutjzBOediyxOccy62PME552LLE5xzLrY8wTnnYuv/AXpdR5dgtNXeAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "def load_data2():\n",
    "    path = './hy_round1_train_20200102'\n",
    "    train_file = os.listdir(path)\n",
    "    X = []\n",
    "    Y = []\n",
    "    for i, each in enumerate(train_file):\n",
    "        if not i % 1000:  #每读1000个文件输出一次\n",
    "            print(i)\n",
    "        each_path = os.path.join(path, each)\n",
    "        df = pd.read_csv(each_path)\n",
    "        x, y = get_feature2(df)\n",
    "        X.append(x)\n",
    "        Y.append(y)\n",
    "    X = np.array(X)\n",
    "    Y = np.array(Y)\n",
    "    return X, Y\n",
    "\n",
    "X,Y= load_data2()\n",
    "\n",
    "\n",
    "# In[18]:\n",
    "\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0)\n",
    "\n",
    "params = {'max_depth':range(5,20),'criterion':np.array(['entropy','gini']),\"class_weight\":['balanced'],\"random_state\":[2021]}\n",
    "clf = GridSearchCV(estimator=tree.DecisionTreeClassifier(), param_grid=params, cv=8, n_jobs=5, scoring=\"f1_macro\")\n",
    "clf.fit(X_train,y_train)\n",
    "print(\"Best set score:{:.2f}\".format(clf.best_score_))\n",
    "print(\"Best parameters:{}\".format(clf.best_params_))\n",
    "print(\"Test set score:{:.2f}\".format(clf.score(X_test, y_test)))\n",
    "eval(clf,X_train, X_test, y_train, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    },
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
